File rl-9.9.2p1.patch of Package bind.openSUSE_Evergreen_11.4

diff -r -u bin/named/client.c-orig bin/named/client.c
--- bin/named/client.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/named/client.c	2004-01-01 00:00:00.000000000 +0000
@@ -994,6 +994,11 @@
 	}
 	if (result != ISC_R_SUCCESS)
 		goto done;
+	/*
+	 * Stop after the question if TC was set for rate limiting.
+	 */
+	if ((client->message->flags & DNS_MESSAGEFLAG_TC) != 0)
+		goto renderend;
 	result = dns_message_rendersection(client->message,
 					   DNS_SECTION_ANSWER,
 					   DNS_MESSAGERENDER_PARTIAL |
@@ -1134,6 +1139,49 @@
 #endif
 
 	/*
+	 * Try to rate limit error responses.
+	 */
+	if (client->view != NULL && client->view->rrl != NULL) {
+		isc_boolean_t wouldlog;
+		char log_buf[DNS_RRL_LOG_BUF_LEN];
+		dns_rrl_result_t rrl_result;
+
+		INSIST(rcode != dns_rcode_noerror &&
+		       rcode != dns_rcode_nxdomain);
+		wouldlog = (ns_g_server->log_queries &&
+			    isc_log_wouldlog(ns_g_lctx, DNS_RRL_LOG_DROP));
+		rrl_result = dns_rrl(client->view, &client->peeraddr,
+				     TCP_CLIENT(client),
+				     dns_rdataclass_in, dns_rdatatype_none,
+				     NULL, rcode, client->now,
+				     wouldlog, log_buf, sizeof(log_buf));
+		if (rrl_result != DNS_RRL_RESULT_OK) {
+			/*
+			 * Log dropped errors in the query category
+			 * so that they are not lost in silence.
+			 * Starts of rate-limited bursts are logged in
+			 * NS_LOGCATEGORY_RRL.
+			 */
+			if (wouldlog) {
+				ns_client_log(client, NS_LOGCATEGORY_QUERIES,
+					      NS_LOGMODULE_CLIENT,
+					      DNS_RRL_LOG_DROP,
+					      "%s", log_buf);
+			}
+			/*
+			 * Some error responses cannot be 'slipped',
+			 * so don't try.
+			 * This will counted with dropped queries in the
+			 * QryDropped counter.
+			 */
+			if (!client->view->rrl->log_only) {
+				ns_client_next(client, DNS_R_DROP);
+				return;
+			}
+		}
+	}
+
+	/*
 	 * Message may be an in-progress reply that we had trouble
 	 * with, in which case QR will be set.  We need to clear QR before
 	 * calling dns_message_reply() to avoid triggering an assertion.
diff -r -u bin/named/config.c-orig bin/named/config.c
--- bin/named/config.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/named/config.c	2004-01-01 00:00:00.000000000 +0000
@@ -227,6 +227,17 @@
 	notify no;\n\
 	allow-new-zones no;\n\
 \n\
+	# Prevent use of this zone in DNS amplified reflection DoS attacks\n\
+	# Notice the size of the authors.bind response.\n\
+	rate-limit {\n\
+		responses-per-second 1;\n\
+		window 10;\n\
+		slip 0;\n\
+		IPv4-prefix-length 16;\n\
+		IPv6-prefix-length 32;\n\
+		min-table-size 10;\n\
+	};\n\
+\n\
 	zone \"version.bind\" chaos {\n\
 		type master;\n\
 		database \"_builtin version\";\n\
diff -r -u bin/named/include/named/query.h-orig bin/named/include/named/query.h
--- bin/named/include/named/query.h-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/named/include/named/query.h	2004-01-01 00:00:00.000000000 +0000
@@ -85,6 +85,7 @@
 #define NS_QUERYATTR_CACHEACLOK		0x2000
 #define NS_QUERYATTR_DNS64		0x4000
 #define NS_QUERYATTR_DNS64EXCLUDE	0x8000
+#define NS_QUERYATTR_RRL_CHECKED	0x10000
 
 
 isc_result_t
diff -r -u bin/named/include/named/server.h-orig bin/named/include/named/server.h
--- bin/named/include/named/server.h-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/named/include/named/server.h	2004-01-01 00:00:00.000000000 +0000
@@ -165,7 +165,10 @@
 	dns_nsstatscounter_updatefail = 34,
 	dns_nsstatscounter_updatebadprereq = 35,
 
-	dns_nsstatscounter_max = 36
+	dns_nsstatscounter_ratedropped = 36,
+	dns_nsstatscounter_rateslipped = 37,
+
+	dns_nsstatscounter_max = 38
 };
 
 void
diff -r -u bin/named/query.c-orig bin/named/query.c
--- bin/named/query.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/named/query.c	2004-01-01 00:00:00.000000000 +0000
@@ -5748,6 +5748,104 @@
  resume:
 	CTRACE("query_find: resume");
 
+	/*
+	 * Rate limit these responses to this client.
+	 */
+	if (client->view->rrl != NULL &&
+	    fname != NULL && dns_name_isabsolute(fname) &&
+	    (client->query.attributes & NS_QUERYATTR_RRL_CHECKED) == 0) {
+		dns_rdataset_t nc_rdataset;
+		dns_rcode_t rcode;
+		isc_boolean_t wouldlog;
+		char log_buf[DNS_RRL_LOG_BUF_LEN];
+		isc_result_t nc_result;
+		dns_rrl_result_t rrl_result;
+
+		client->query.attributes |= NS_QUERYATTR_RRL_CHECKED;
+
+		wouldlog = isc_log_wouldlog(ns_g_lctx, DNS_RRL_LOG_DROP);
+		tname = fname;
+		if (result == DNS_R_NXDOMAIN) {
+			/*
+			 * Use the database origin name to rate limit NXDOMAIN
+			 */
+			if (db != NULL)
+				tname = dns_db_origin(db);
+			rcode = dns_rcode_nxdomain;
+		} else if (result == DNS_R_NCACHENXDOMAIN &&
+			   rdataset != NULL &&
+			   dns_rdataset_isassociated(rdataset) &&
+			   (rdataset->attributes &
+			    DNS_RDATASETATTR_NEGATIVE) != 0) {
+			/*
+			 * Try to use owner name in the negative cache SOA.
+			 */
+			dns_fixedname_init(&fixed);
+			dns_rdataset_init(&nc_rdataset);
+			for (nc_result = dns_rdataset_first(rdataset);
+			     nc_result == ISC_R_SUCCESS;
+			     nc_result = dns_rdataset_next(rdataset)) {
+				dns_ncache_current(rdataset,
+						   dns_fixedname_name(&fixed),
+						   &nc_rdataset);
+				if (nc_rdataset.type == dns_rdatatype_soa) {
+					dns_rdataset_disassociate(&nc_rdataset);
+					tname = dns_fixedname_name(&fixed);
+					break;
+				}
+				dns_rdataset_disassociate(&nc_rdataset);
+			}
+			rcode = dns_rcode_nxdomain;
+		} else {
+			rcode = dns_rcode_noerror;
+		}
+		rrl_result = dns_rrl(client->view, &client->peeraddr,
+				     ISC_TF((client->attributes
+					     & NS_CLIENTATTR_TCP) != 0),
+				     client->message->rdclass, qtype, tname,
+				     rcode, client->now,
+				     wouldlog, log_buf, sizeof(log_buf));
+		if (rrl_result != DNS_RRL_RESULT_OK) {
+			/*
+			 * Log dropped or slipped responses in the query
+			 * category so that requests are not silently lost.
+			 * Starts of rate-limited bursts are logged in
+			 * DNS_LOGCATEGORY_RRL.
+			 *
+			 * Dropped responses are counted with dropped queries
+			 * in QryDropped while slipped responses are counted
+			 * with other truncated responses in RespTruncated.
+			 */
+			if (wouldlog && ns_g_server->log_queries) {
+				ns_client_log(client, NS_LOGCATEGORY_QUERIES,
+					      NS_LOGMODULE_CLIENT,
+					      DNS_RRL_LOG_DROP,
+					      "%s", log_buf);
+			}
+			if (!client->view->rrl->log_only) {
+				if (rrl_result == DNS_RRL_RESULT_DROP) {
+					/*
+					 * These will also be counted in
+					 * dns_nsstatscounter_dropped
+					 */
+					inc_stats(client,
+						dns_nsstatscounter_ratedropped);
+					QUERY_ERROR(DNS_R_DROP);
+				} else {
+					/*
+					 * These will also be counted in
+					 * dns_nsstatscounter_truncatedresp
+					 */
+					inc_stats(client,
+						dns_nsstatscounter_rateslipped);
+					client->message->flags |=
+						DNS_MESSAGEFLAG_TC;
+				}
+				goto cleanup;
+			}
+		}
+	}
+
 	if (!ISC_LIST_EMPTY(client->view->rpz_zones) &&
 	    (RECURSIONOK(client) || !client->view->rpz_recursive_only) &&
 	    rpz_ck_dnssec(client, result, rdataset, sigrdataset) &&
@@ -7170,12 +7268,14 @@
 	}
 
 	if (eresult != ISC_R_SUCCESS &&
-	    (!PARTIALANSWER(client) || WANTRECURSION(client))) {
+	    (!PARTIALANSWER(client) || WANTRECURSION(client)
+	     || eresult == DNS_R_DROP)) {
 		if (eresult == DNS_R_DUPLICATE || eresult == DNS_R_DROP) {
 			/*
 			 * This was a duplicate query that we are
-			 * recursing on.  Don't send a response now.
-			 * The original query will still cause a response.
+			 * recursing on or the result of rate limiting.
+			 * Don't send a response now for a duplicate query,
+			 * because the original will still cause a response.
 			 */
 			query_next(client, eresult);
 		} else {
diff -r -u bin/named/server.c-orig bin/named/server.c
--- bin/named/server.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/named/server.c	2004-01-01 00:00:00.000000000 +0000
@@ -1561,6 +1561,201 @@
 	return (result);
 }
 
+#define CHECK_RRL(obj, cond, pat, val)					\
+	do {								\
+		if (!(cond)) {						\
+			cfg_obj_log(obj, ns_g_lctx, ISC_LOG_ERROR,	\
+				    pat, val);				\
+			result = ISC_R_RANGE;				\
+			goto cleanup;					\
+		    }							\
+	} while (0)
+
+static isc_result_t
+configure_rrl(dns_view_t *view, const cfg_obj_t *config, const cfg_obj_t *map) {
+	const cfg_obj_t *obj;
+	dns_rrl_t *rrl;
+	isc_result_t result;
+ 	int min_entries, i, j;
+
+	/*
+	 * Most DNS servers have few clients, but intentinally open
+	 * recursive and authoritative servers often have many.
+	 * So start with a small number of entries unless told otherwise
+	 * to reduce cold-start costs.
+	 */
+	min_entries = 1000;
+	obj = NULL;
+	result = cfg_map_get(map, "min-table-size", &obj);
+	if (result == ISC_R_SUCCESS) {
+		min_entries = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, min_entries > 1,
+			  "invalid '{min-table-size %d;}'", min_entries);
+	}
+	result = dns_rrl_init(&rrl, view, min_entries);
+	if (result != ISC_R_SUCCESS)
+		return (result);
+
+	i = ISC_MAX(10000, min_entries);
+	obj = NULL;
+	result = cfg_map_get(map, "max-table-size", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i >= min_entries,
+			  "invalid '{max-table-size %d;}'", i);
+	}
+	rrl->max_entries = i;
+
+	obj = NULL;
+	result = cfg_map_get(map, "responses-per-second", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+			  "invalid '{responses-per-second %d;}'", i);
+	}
+	rrl->responses_per_second = i;
+	rrl->scaled_responses_per_second = rrl->responses_per_second;
+
+	/*
+	 * The default error rate is the response rate,
+	 * and so off by default.
+	 */
+	i = rrl->responses_per_second;
+	obj = NULL;
+	result = cfg_map_get(map, "errors-per-second", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+			  "invalid '{errors-per-second %d;}'", i);
+	}
+	rrl->errors_per_second = i;
+	rrl->scaled_errors_per_second = rrl->errors_per_second;
+	/*
+	 * The default NXDOMAIN rate is the response rate,
+	 * and so off by default.
+	 */
+	i = rrl->responses_per_second;
+	obj = NULL;
+	result = cfg_map_get(map, "nxdomains-per-second", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+			  "invalid '{nxdomains-per-second %d;}'", i);
+	}
+	rrl->nxdomains_per_second = i;
+	rrl->scaled_nxdomains_per_second = rrl->nxdomains_per_second;
+
+	/*
+	 * The all-per-second rate is off by default.
+	 */
+	i = 0;
+	obj = NULL;
+	result = cfg_map_get(map, "all-per-second", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+			  "invalid '{all-per-second %d;}'", i);
+		CHECK_RRL(obj, i == 0 || (i >= rrl->responses_per_second*4 &&
+					  i >= rrl->errors_per_second*4 &&
+					  i >= rrl->nxdomains_per_second*4),
+			  "'{all-per-second %d;}' must be"
+			  " at least 4 times responses-per-second,"
+			  "errors_per_second, and nxdomains_per_second",
+			  i);
+	}
+	rrl->all_per_second = i;
+	rrl->scaled_all_per_second = rrl->all_per_second;
+
+	i = 2;
+	obj = NULL;
+	result = cfg_map_get(map, "slip", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i <= DNS_RRL_MAX_SLIP, "invalid '{slip %d;}'", i);
+	}
+	rrl->slip = i;
+	rrl->scaled_slip = rrl->slip;
+
+	i = 15;
+	obj = NULL;
+	result = cfg_map_get(map, "window", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i >= 1 && i <= DNS_RRL_MAX_WINDOW,
+			  "invalid '{window %d;}'", i);
+	}
+	rrl->window = i;
+
+	i = 0;
+	obj = NULL;
+	result = cfg_map_get(map, "qps-scale", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i >= 1, "invalid '{qps-scale %d;}'", i);
+	}
+	rrl->qps_scale = i;
+	rrl->qps = 1.0;
+
+	i = 24;
+	obj = NULL;
+	result = cfg_map_get(map, "IPv4-prefix-length", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i >= 8 && i <= 32,
+			  "invalid '{IPv4-prefix-length %d;}'", i);
+	}
+	rrl->ipv4_prefixlen = i;
+	if (i == 32)
+		rrl->ipv4_mask = 0xffffffff;
+	else
+		rrl->ipv4_mask = htonl(0xffffffff << (32-i));
+
+	i = 56;
+	obj = NULL;
+	result = cfg_map_get(map, "IPv6-prefix-length", &obj);
+	if (result == ISC_R_SUCCESS) {
+		i = cfg_obj_asuint32(obj);
+		CHECK_RRL(obj, i >= 16 && i <= 128,
+			  "invalid '{IPv6-prefix-length %d;}'", i);
+	}
+	rrl->ipv6_prefixlen = i;
+	memset(rrl->ipv6_mask, 0xff, sizeof(rrl->ipv6_mask));
+	for (j = 0; j < 4; ++j) {
+		if (i == 0) {
+			rrl->ipv6_mask[j] = 0;
+		} else if (i < 32) {
+			rrl->ipv6_mask[j] = htonl(0xffffffff << (32-i));
+			i = 0;
+		} else {
+			rrl->ipv6_mask[j] = 0xffffffff;
+			i -= 32;
+		}
+	}
+
+	obj = NULL;
+	result = cfg_map_get(map, "exempt-clients", &obj);
+	if (result == ISC_R_SUCCESS) {
+		result = cfg_acl_fromconfig(obj, config, ns_g_lctx,
+					    ns_g_aclconfctx, ns_g_mctx,
+					    0, &rrl->exempt);
+		CHECK_RRL(obj, result == ISC_R_SUCCESS,
+			  "invalid %s", "address_match_list");
+	}
+
+	obj = NULL;
+	result = cfg_map_get(map, "log-only", &obj);
+	if (result == ISC_R_SUCCESS && cfg_obj_asboolean(obj))
+		rrl->log_only = ISC_TRUE;
+	else
+		rrl->log_only = ISC_FALSE;
+
+	return (ISC_R_SUCCESS);
+
+ cleanup:
+	dns_rrl_view_destroy(view);
+	return (result);
+}
+
 /*
  * Configure 'view' according to 'vconfig', taking defaults from 'config'
  * where values are missing in 'vconfig'.
@@ -2925,6 +3120,14 @@
 		}
 	}
 
+	obj = NULL;
+	result = ns_config_get(maps, "rate-limit", &obj);
+	if (result == ISC_R_SUCCESS) {
+		result = configure_rrl(view, config, obj);
+		if (result != ISC_R_SUCCESS)
+			goto cleanup;
+	}
+
 	result = ISC_R_SUCCESS;
 
  cleanup:
diff -r -u bin/named/statschannel.c-orig bin/named/statschannel.c
--- bin/named/statschannel.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/named/statschannel.c	2004-01-01 00:00:00.000000000 +0000
@@ -202,6 +202,10 @@
 	SET_NSSTATDESC(updatebadprereq,
 		       "updates rejected due to prerequisite failure",
 		       "UpdateBadPrereq");
+	SET_NSSTATDESC(ratedropped, "responses dropped for rate limits",
+		       "RateDropped");
+	SET_NSSTATDESC(rateslipped, "responses truncated for rate limits",
+		       "RateSlipped");
 	INSIST(i == dns_nsstatscounter_max);
 
 	/* Initialize resolver statistics */
diff -r -u bin/tests/system/README-orig bin/tests/system/README
--- bin/tests/system/README-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/README	2004-01-01 00:00:00.000000000 +0000
@@ -17,6 +17,7 @@
   nsupdate/	Dynamic update and IXFR tests
   resolver/     Regression tests for resolver bugs that have been fixed
 		(not a complete resolver test suite)
+  rrl/		query rate limiting
   rpz/		Tests of response policy zone (RPZ) rewriting
   stub/		Tests of stub zone functionality
   unknown/	Unknown type and class tests
diff -r -u bin/tests/system/conf.sh.in-orig bin/tests/system/conf.sh.in
--- bin/tests/system/conf.sh.in-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/conf.sh.in	2004-01-01 00:00:00.000000000 +0000
@@ -58,7 +58,7 @@
 	 @CHECKDS@ checknames checkzone database dlv dlvauto dlz dlzexternal
          dname dns64 dnssec ecdsa forward glue gost ixfr inline limits
 	 logfileconfig lwresd masterfile masterformat metadata notify
-	 nsupdate pending pkcs11 redirect resolver rndc rpz rrsetorder
+	 nsupdate pending pkcs11 redirect resolver rndc rpz rrl rrsetorder
 	 rsabigexponent sortlist smartsign staticstub stub tkey tsig
 	 tsiggss unknown upforwd verify views xfer xferquota zonechecks"
 
diff -r -u bin/tests/system/rrl/clean.sh-orig bin/tests/system/rrl/clean.sh
--- bin/tests/system/rrl/clean.sh-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/clean.sh	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,22 @@
+# Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# $Id$
+
+
+# Clean up after rrl tests.
+
+rm -f dig.out*
+rm -f  */named.memstats */named.run ns*/log* */named.rpz */session.key
+rm -f ns3/bl*.db */*.jnl */*.core */*.pid
diff -r -u bin/tests/system/rrl/ns1/named.conf-orig bin/tests/system/rrl/ns1/named.conf
--- bin/tests/system/rrl/ns1/named.conf-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns1/named.conf	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+controls { /* empty */ };
+
+options {
+	query-source address 10.53.0.1;
+	notify-source 10.53.0.1;
+	transfer-source 10.53.0.1;
+	port 5300;
+	session-keyfile "session.key";
+	pid-file "named.pid";
+	listen-on { 10.53.0.1; };
+	listen-on-v6 { none; };
+	notify no;
+};
+
+zone "." {type master; file "root.db";};
diff -r -u bin/tests/system/rrl/ns1/root.db-orig bin/tests/system/rrl/ns1/root.db
--- bin/tests/system/rrl/ns1/root.db-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns1/root.db	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,32 @@
+; Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+$TTL	120
+@		SOA	ns. hostmaster.ns. ( 1 3600 1200 604800 60 )
+@		NS	ns.
+ns.		A	10.53.0.1
+.		A	10.53.0.1
+
+; limit responses from here
+tld2.		NS	ns.tld2.
+ns.tld2.	A	10.53.0.2
+
+; limit recursion to here
+tld3.		NS	ns.tld3.
+ns.tld3.	A	10.53.0.3
+
+; generate SERVFAIL
+tld4.		NS	ns.tld3.
diff -r -u bin/tests/system/rrl/ns2/hints-orig bin/tests/system/rrl/ns2/hints
--- bin/tests/system/rrl/ns2/hints-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns2/hints	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,19 @@
+; Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+.	0	NS	ns1.
+ns1.	0	A	10.53.0.1
diff -r -u bin/tests/system/rrl/ns2/named.conf-orig bin/tests/system/rrl/ns2/named.conf
--- bin/tests/system/rrl/ns2/named.conf-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns2/named.conf	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+controls { /* empty */ };
+
+options {
+	query-source address 10.53.0.2;
+	notify-source 10.53.0.2;
+	transfer-source 10.53.0.2;
+	port 5300;
+	session-keyfile "session.key";
+	pid-file "named.pid";
+	listen-on { 10.53.0.2; };
+	listen-on-v6 { none; };
+	notify no;
+
+	rate-limit {
+	    responses-per-second 2;
+	    all-per-second 70;
+	    IPv4-prefix-length 24;
+	    IPv6-prefix-length 64;
+	    slip 3;
+	    /* qps-scale 2; */
+	    exempt-clients { 10.53.0.7; };
+	    window 1;
+	    max-table-size 100;
+	    min-table-size 2;
+	};
+};
+
+/*
+ * These log settings have no effect unless "-g" is removed from ../../start.pl
+ */
+logging {
+	channel debug {
+	    file "log-debug";
+	    print-category yes; print-severity yes; severity debug 10;
+	};
+	channel queries {
+	    file "log-queries";
+	    print-category yes; print-severity yes; severity info;
+	};
+	category rate-limit { debug; queries; };
+	category queries { debug; queries; };
+};
+
+zone "." { type hint; file "hints"; };
+
+zone "tld2."{ type master; file "tld2.db"; };
diff -r -u bin/tests/system/rrl/ns2/tld2.db-orig bin/tests/system/rrl/ns2/tld2.db
--- bin/tests/system/rrl/ns2/tld2.db-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns2/tld2.db	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,43 @@
+; Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+; rate limit response from this zone
+
+$TTL	120
+@		SOA	tld2.  hostmaster.ns.tld2. ( 1 3600 1200 604800 60 )
+		NS	ns
+		NS	.
+ns		A	10.53.0.2
+
+a1		A	192.168.2.1
+
+*.a2		A	192.168.2.2
+
+; a3 is in tld3
+
+; a4 does not exist to give NXDOMAIN
+
+; a5 for TCP requests
+a5		A	192.168.2.5
+
+; a6 for whitelisted clients
+a6		A	192.168.2.6
+
+; a7 for SERVFAIL
+
+; a8 for all-per-second limit
+$GENERATE 101-180 all$.a8 A 192.168.2.8
diff -r -u bin/tests/system/rrl/ns3/hints-orig bin/tests/system/rrl/ns3/hints
--- bin/tests/system/rrl/ns3/hints-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns3/hints	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,19 @@
+; Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+.	0	NS	ns1.
+ns1.	0	A	10.53.0.1
diff -r -u bin/tests/system/rrl/ns3/named.conf-orig bin/tests/system/rrl/ns3/named.conf
--- bin/tests/system/rrl/ns3/named.conf-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns3/named.conf	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+controls { /* empty */ };
+
+options {
+	query-source address 10.53.0.3;
+	notify-source 10.53.0.3;
+	transfer-source 10.53.0.3;
+	port 5300;
+	session-keyfile "session.key";
+	pid-file "named.pid";
+	listen-on { 10.53.0.3; };
+	listen-on-v6 { none; };
+	notify no;
+};
+
+zone "." { type hint; file "hints"; };
+
+zone "tld3."{ type master; file "tld3.db"; };
diff -r -u bin/tests/system/rrl/ns3/tld3.db-orig bin/tests/system/rrl/ns3/tld3.db
--- bin/tests/system/rrl/ns3/tld3.db-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns3/tld3.db	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,26 @@
+; Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+; rate limit response from this zone
+
+$TTL	120
+@		SOA	tld3.  hostmaster.ns.tld3. ( 1 3600 1200 604800 60 )
+		NS	ns
+		NS	.
+ns		A	10.53.0.3
+
+*.a3		A	192.168.3.3
diff -r -u bin/tests/system/rrl/setup.sh-orig bin/tests/system/rrl/setup.sh
--- bin/tests/system/rrl/setup.sh-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/setup.sh	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,22 @@
+#!/bin/sh
+#
+# Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# $Id$
+
+SYSTEMTESTTOP=..
+. $SYSTEMTESTTOP/conf.sh
+. ./clean.sh
+
diff -r -u bin/tests/system/rrl/tests.sh-orig bin/tests/system/rrl/tests.sh
--- bin/tests/system/rrl/tests.sh-orig	2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/tests.sh	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,210 @@
+# Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# $Id$
+
+# test response rate limiting
+
+SYSTEMTESTTOP=..
+. $SYSTEMTESTTOP/conf.sh
+
+#set -x
+
+ns1=10.53.0.1			    # root, defining the others
+ns2=10.53.0.2			    # test server
+ns3=10.53.0.3			    # secondary test server
+ns7=10.53.0.7			    # whitelisted client
+
+USAGE="$0: [-x]"
+while getopts "x" c; do
+    case $c in
+	x) set -x;;
+	*) echo "$USAGE" 1>&2; exit 1;;
+    esac
+done
+shift `expr $OPTIND - 1 || true`
+if test "$#" -ne 0; then
+    echo "$USAGE" 1>&2
+    exit 1
+fi
+# really quit on control-C
+trap 'exit 1' 1 2 15
+
+
+ret=0
+setret () {
+    ret=1
+    echo "$*"
+}
+
+
+# Wait until soon after the start of a second to make results consistent.
+#   The start of a second credits a rate limit.
+#   This would be far easier in C or by assuming a modern version of perl.
+sec_start () {
+    START=`date`
+    while true; do
+	NOW=`date`
+	if test "$START" != "$NOW"; then
+	    return
+	fi
+	$PERL -e 'select(undef, undef, undef, 0.05)' || true
+    done
+}
+
+
+#   $1=result name  $2=domain name  $3=dig options
+digcmd () {
+    OFILE=$1; shift
+    DIG_DOM=$1; shift
+    ARGS="+noadd +noauth +nosearch +time=1 +tries=1 +ignore $* -p 5300 $DIG_DOM @$ns2"
+    #echo I:dig $ARGS 1>&2
+    START=`date +%y%m%d%H%M.%S`
+    RESULT=`$DIG $ARGS 2>&1 | tee $OFILE=TEMP				\
+	    | sed -n -e  's/^[^;].*	\([^	 ]\{1,\}\)$/\1/p'	\
+		-e 's/;; flags.* tc .*/TC/p'				\
+		-e 's/;; .* status: NXDOMAIN.*/NXDOMAIN/p'		\
+		-e 's/;; .* status: SERVFAIL.*/SERVFAIL/p'		\
+		-e 's/;; connection timed out.*/drop/p'			\
+		-e 's/;; communications error to.*/drop/p'		\
+	    | tr -d '\n'`
+    mv "$OFILE=TEMP" "$OFILE=$RESULT"
+    touch -t $START "$OFILE=$RESULT"
+}
+
+
+#   $1=number of tests  $2=target domain  $3=dig options
+CNT=1
+burst () {
+    BURST_LIMIT=$1; shift
+    BURST_DOM_BASE="$1"; shift
+    while test "$BURST_LIMIT" -ge 1; do
+	if test $CNT -lt 10; then
+	    CNT="0$CNT"
+	fi
+	if test $CNT -lt 100; then
+	    CNT="0$CNT"
+	fi
+	eval BURST_DOM="$BURST_DOM_BASE"
+	FILE="dig.out-$BURST_DOM-$CNT"
+	rm -f $FILE=*
+	digcmd $FILE $BURST_DOM $* &
+	CNT=`expr $CNT + 1`
+	BURST_LIMIT=`expr "$BURST_LIMIT" - 1`
+    done
+}
+
+
+#   $1=domain  $2=IP address  $3=# of IP addresses  $4=TC  $5=drop
+#	$6=NXDOMAIN  $7=SERVFAIL or other errors
+ck_result() {
+    BAD=
+    wait
+    ADDRS=`ls dig.out-$1-*=$2		2>/dev/null	| wc -l | tr -d ' '`
+    TC=`ls dig.out-$1-*=TC		2>/dev/null	| wc -l | tr -d ' '`
+    DROP=`ls dig.out-$1-*=drop		2>/dev/null	| wc -l | tr -d ' '`
+    NXDOMAIN=`ls dig.out-$1-*=NXDOMAIN	2>/dev/null	| wc -l | tr -d ' '`
+    SERVFAIL=`ls dig.out-$1-*=SERVFAIL	2>/dev/null	| wc -l | tr -d ' '`
+    if test $ADDRS -ne "$3"; then
+	setret "I:$ADDRS instead of $3 $2 responses for $1"
+	BAD=yes
+    fi
+    if test $TC -ne "$4"; then
+	setret "I:$TC instead of $4 truncation responses for $1"
+	BAD=yes
+    fi
+    if test $DROP -ne "$5"; then
+	setret "I:$DROP instead of $5 dropped responses for $1"
+	BAD=yes
+    fi
+    if test $NXDOMAIN -ne "$6"; then
+	setret "I:$NXDOMAIN instead of $6 NXDOMAIN responses for $1"
+	BAD=yes
+    fi
+    if test $SERVFAIL -ne "$7"; then
+	setret "I:$SERVFAIL instead of $7 error responses for $1"
+	BAD=yes
+    fi
+    if test -z "$BAD"; then
+	rm -f dig.out-$1-*
+    fi
+}
+
+
+#########
+sec_start
+
+# basic rate limiting
+burst 3 a1.tld2
+# 1 second delay allows an additional response.
+sleep 1
+burst 21 a1.tld2
+# request 30 different qnames to try a wild card
+burst 30 'x$CNT.a2.tld2'
+
+#					IP      TC      drop  NXDOMAIN SERVFAIL
+# check for 24 results
+# including the 1 second delay
+ck_result   a1.tld2	192.168.2.1	3	7	14	0	0
+
+# Check the wild card answers.
+# The parent name of the 30 requests is counted.
+ck_result 'x*.a2.tld2'	192.168.2.2	2	9	19	0	0
+
+
+#########
+sec_start
+
+burst 1 'y$CNT.a3.tld3'; wait; burst 20 'y$CNT.a3.tld3'
+burst 20 'z$CNT.a4.tld2'
+
+# Recursion.
+#   The first answer is counted separately because it is counted against
+#   the rate limit on recursing to the server for a3.tld3.  The remaining 20
+#   are counted as local responses from the cache.
+ck_result 'y*.a3.tld3'	192.168.3.3	3	6	12	0	0
+
+# NXDOMAIN responses are also limited based on the parent name.
+ck_result 'z*.a4.tld2'	x		0	6	12	2	0
+
+
+#########
+sec_start
+
+burst 20 a5.tld2 +tcp
+burst 20 a6.tld2 -b $ns7
+burst 20 a7.tld4
+
+# TCP responses are not rate limited
+ck_result a5.tld2	192.168.2.5	20	0	0	0	0
+
+# whitelisted client is not rate limited
+ck_result a6.tld2	192.168.2.6	20	0	0	0	0
+
+# Errors such as SERVFAIL are rate limited.  The numbers are confusing, because
+#   other rate limiting can be triggered before SERVFAIL is reached.
+ck_result a7.tld4	192.168.2.1	0	5	13	0	2
+
+
+#########
+sec_start
+
+# all-per-second
+CNT=101
+burst 80 'all$CNT.a8.tld2'
+ck_result 'a*.a8.tld2'	192.168.2.8	70	3	7	0	0
+
+
+echo "I:exit status: $ret"
+exit $ret
diff -r -u doc/arm/Bv9ARM-book.xml-orig doc/arm/Bv9ARM-book.xml
--- doc/arm/Bv9ARM-book.xml-orig	2004-01-01 00:00:00.000000000 +0000
+++ doc/arm/Bv9ARM-book.xml	2004-01-01 00:00:00.000000000 +0000
@@ -4803,6 +4803,24 @@
 		    </para>
 		  </entry>
 		</row>
+                <row rowsep="0">
+                  <entry colname="1">
+                    <para><command>rate-limit</command></para>
+                  </entry>
+		  <entry colname="2">
+		    <para>
+		      The start, periodic, and final notices of rate limiting
+		      of a stream of responses are logged at
+		      <command>info</command> severity in this category.
+		      Various internal performance data such as expansions
+		      of the table is logged debug 1 level and higher.
+		      Rate limiting of individual requests
+		      is logged in the <command>queries</command> category
+		      and can be controlled with the
+		      <command>querylog</command> option.
+		    </para>
+		  </entry>
+		</row>
 	      </tbody>
 	    </tgroup>
 	  </informaltable>
@@ -5334,6 +5352,21 @@
     <optional> resolver-query-timeout <replaceable>number</replaceable> ; </optional>
     <optional> deny-answer-addresses { <replaceable>address_match_list</replaceable> } <optional> except-from { <replaceable>namelist</replaceable> } </optional>;</optional>
     <optional> deny-answer-aliases { <replaceable>namelist</replaceable> } <optional> except-from { <replaceable>namelist</replaceable> } </optional>;</optional>
+    <optional> rate-limit {
+	<optional> responses-per-second <replaceable>number</replaceable> ; </optional>
+	<optional> errors-per-second <replaceable>number</replaceable> ; </optional>
+	<optional> nxdomains-per-second <replaceable>number</replaceable> ; </optional>
+	<optional> all-per-second <replaceable>number</replaceable> ; </optional>
+	<optional> window <replaceable>number</replaceable> ; </optional>
+	<optional> log-only <replaceable>yes_or_no</replaceable> ; </optional>
+	<optional> qps-scale <replaceable>number</replaceable> ; </optional>
+	<optional> IPv4-prefix-length <replaceable>number</replaceable> ; </optional>
+	<optional> IPv6-prefix-length <replaceable>number</replaceable> ; </optional>
+	<optional> slip <replaceable>number</replaceable> ; </optional>
+	<optional> exempt-clients  { <replaceable>address_match_list</replaceable> } ; </optional>
+	<optional> max-table-size <replaceable>number</replaceable> ; </optional>
+	<optional> min-table-size <replaceable>number</replaceable> ; </optional>
+      } ; </optional>
     <optional> response-policy { <replaceable>zone_name</replaceable>
 	<optional> policy given | disabled | passthru | nxdomain | nodata | cname <replaceable>domain</replaceable> </optional>
 	<optional> recursive-only <replaceable>yes_or_no</replaceable> </optional> <optional> max-policy-ttl <replaceable>number</replaceable> </optional> ;
@@ -9737,6 +9770,204 @@
 48.zz.2.2001.rpz-nsip       CNAME   .
 </programlisting>
         </sect3>
+
+	<sect3>
+	  <title>Rate Limiting</title>
+	  <para>
+	    Excessive essentially identical UDP <emphasis>responses</emphasis>
+	    can be discarded by configuring a
+	    <command>rate-limit</command> clause in an
+	    <command>options</command> statement.
+	    This mechanism keeps BIND 9 from being used
+	    in amplifying reflection denial of service attacks
+	    as well as partially protecting BIND 9 itself from
+	    some denial of service attacks.
+	    Very short truncated responses can be sent to provide
+	    rate-limited responses to legitimate
+	    clients within a range of attacked and forged IP addresses,
+	    Legitimate clients react to truncated response by retrying
+	    with TCP.
+	  </para>
+
+	  <para>
+	    Rate limiting works by setting
+	    <command>responses-per-second</command>
+	    to a number of repetitions per second for responses for a given name
+	    and record type to a DNS client.
+	  </para>
+
+	  <para>
+	    <command>Responses-per-second</command> is a limit on
+	    identical responses instead of a limit on all responses or
+	    even all responses to a single client.
+	    10 identical responses per second is a generous limit except perhaps
+	    when many clients are using a single IP address via network
+	    address translation (NAT).
+	    The default limit of zero specifies an unbounded limit to turn off
+	    rate-limiting in a view or to only rate-limit NXDOMAIN or other
+	    errors.
+	  </para>
+
+	  <para>
+	    The notion of "identical responses"
+	    and "single DNS client" cannot be simplistic.
+	    All responses to a CIDR block with prefix
+	    length specified with <command>IPv4-prefix-length</command>
+	    (default 24) or <command>IPv6-prefix-length</command>
+	    (default 56) are assumed to come from a single DNS client.
+	    Requests for a name that result in DNS NXDOMAIN
+	    errors are considered identical.
+	    This controls some attacks using random names, but
+	    accommodates servers that expect many legitimate NXDOMAIN responses
+	    such as anti-spam blacklists.
+	    By default the limit on NXDOMAIN errors is the same as the
+	    <command>responses-per-second</command> value,
+	    but it can be set separately with
+	    <command>nxdomains-per-second</command>.
+	    All requests for all names or types that result in DNS errors
+	    such as SERVFAIL and FORMERR (but not NXDOMAIN) are considered
+	    identical.
+	    This controls attacks using invalid requests or distant,
+	    broken authoritative servers.
+	    By default the limit on errors is the same as the
+	    <command>responses-per-second</command> value,
+	    but it can be set separately with
+	    <command>errors-per-second</command>.
+	  </para>
+
+	  <para>
+	    Rate limiting uses a "credit" or "token bucket" scheme.
+	    Each identical response has a conceptual account
+	    that is given <command>responses-per-second</command>,
+	    <command>errors-per-second</command>, and
+	    <command>nxdomains-per-second</command> credits every second.
+	    A DNS request triggering some desired response debits
+	    the account by one.
+	    Responses are not sent while the account is negative.
+	    The account cannot become more positive than
+	    the per-second limit
+	    or more negative than <command>window</command>
+	    times the per-second limit.
+	    A DNS client that sends requests that are not
+	    answered can penalized for up to <command>window</command> seconds
+	    (default 15).
+	  </para>
+
+	  <para>
+	    Responses generated from local wildcards are counted and limited
+	    as if they were for the parent domain name.
+	    This prevents flooding by requesting random.wild.example.com.
+	    For similar reasons, NXDOMAIN responses are counted and rate
+	    limited their owner name, the nearest valid domain name to the
+	    query name with an SOA record.
+	  </para>
+
+	  <para>
+	    Many attacks using DNS involve UDP requests with forged source
+	    addresses.
+	    Rate limiting prevents the use of BIND 9 to flood a network
+	    with responses to requests with forged source addresses,
+	    but could let a third party block responses to legitimate requests.
+	    There is a mechanism that can answer some legitimate
+	    requests from a client whose address is being forged in a flood.
+	    Setting <command>slip</command> to 2 (its default) causes every
+	    other UDP request to be answered with a small response
+	    claiming that the response would have been truncated.
+	    The small size and relative infrequency of the response make
+	    it unattractive for abuse of third parties.
+	    <command>slip</command> must be between 0 and 10.
+	    0 "slips" or sends no rate limiting truncated responses.
+	    Some error responses cannot be replaced with responses
+	    with the <literal>TC</literal> flag, and so are instead
+	    leaked at the <command>slip</command> rate.
+	  </para>
+
+	  <para>
+	    When the approximate query per second rate exceeds
+	    the <command>qps-scale</command> value,
+	    the <command>responses-per-second</command>,
+	    <command>errors-per-second</command>,
+	    <command>nxdomains-per-second</command>,
+	    and <command>slip</command> values are reduced by the
+	    ratio of the current rate to the <command>qps-scale</command> value.
+	    This feature can tighten defenses during attacks.
+	    For example, with
+	    <command>qps-scale 250; responses-per-second 20;</command> and
+	    a total query rate of 1000 queries/second for all queries from
+	    all DNS clients including via TCP,
+	    then the effective responses/second limit changes to
+	    (250/1000)*20 or 5.
+	    The limits for IP addresses using TCP are not reduced.
+	    Responses sent via TCP are not subject to rate limits
+	    but are counted to approximate the query per second rate.
+	  </para>
+
+	  <para>
+	    Communities of DNS clients can be given their own parameters or no
+	    rate limiting by putting
+	    <command>rate-limit</command> statements in <command>view</command>
+	    statements instead of the global <command>option</command>
+	    statement.
+	    A <command>rate-limit</command> statement in a view replaces
+	    instead of being merged with a <command>rate-limit</command>
+	    statement among the main options.
+	    DNS clients within a view can be exempted from rate limits
+	    with the <command>exempt-clients</command> clause.
+	  </para>
+
+	  <para>
+	    UDP responses of all kinds can be limited with the
+	    <command>all-per-second</command> phrase.
+	    This rate limiting is similar to the rate limiting offered by
+	    firewalls.  When performed in a DNS server it is inferior to
+	    the other <command>rate-limit</command> forms, because it ignores
+	    the contents of responses to a block of IP addresses.
+	    The rate limiting provided by
+	    <command>responses-per-second</command>,
+	    <command>errors-per-second</command>, and
+	    <command>nxdomains-per-second</command> on a DNS server
+	    is often invisible to the victim of a DNS reflection attack.
+	    Unless the forged requests of the attack are the same as the
+	    legitimate requests of the victim, the victim's requests are
+	    not affected.
+	    A <command>all-per-second</command> limit must be
+	    at least 4 times as large as the other limits,
+	    because single DNS clients often send bursts of legitimate
+	    requests.
+	    For example, the receipt of a single mail message can prompt
+	    requests from an SMTP server for NS, PTR, A, and AAAA records
+	    as the incoming SMTP/TCP/IP connection is considered.
+	    The SMTP server can need additional NS, A, AAAA, MX, TXT, and SPF
+	    records as it considers the STMP <command>Mail From</command>
+	    command.
+	  </para>
+
+	  <para>
+	    The maximum size of the table used to track requests and so
+	    rate limit responses is set with <command>max-table-size</command>.
+	    Each entry in the table is between 40 and 80 bytes.
+	    The default of 10,000 is suitable for a server receiving
+	    5000 DNS requests/second.
+	    10,000 entries require about 1 megabyte.
+	    To reduce cold start costs including those in growing the
+	    table, <command>min-table-size</command> (default 1000)
+	    can set the minimum table size.
+	    Enable logging to monitor expansions of the table and inform
+	    non-default choices for the initial and maximum table size.
+	  </para>
+
+	  <para>
+	    Use <command>log-only yes</command> to test rate limiting parameters
+	    without actually dropping any requests.
+	  </para>
+
+	  <para>
+	    Responses dropped by rate limits are included in the
+	    <command>RateDropped</command> and <command>QryDropped</command>
+	    statistics.
+	    Responses that truncated by rate limits are included in
+	    <command>RateSlipped</command> and <command>RespTruncated</command>.
+	</sect3>
       </sect2>
 
       <sect2 id="server_statement_grammar">
@@ -14385,6 +14616,32 @@
 		      </para>
 		    </entry>
 		  </row>
+		  <row rowsep="0">
+		    <entry colname="1">
+		      <para><command>RateDropped</command></para>
+		    </entry>
+		    <entry colname="2">
+		      <para><command></command></para>
+		    </entry>
+		    <entry colname="3">
+		      <para>
+			Responses dropped by rate limits.
+		      </para>
+		    </entry>
+		  </row>
+		  <row rowsep="0">
+		    <entry colname="1">
+		      <para><command>RateSlipped</command></para>
+		    </entry>
+		    <entry colname="2">
+		      <para><command></command></para>
+		    </entry>
+		    <entry colname="3">
+		      <para>
+			Responses truncated by rate limits.
+		      </para>
+		    </entry>
+		  </row>
 		</tbody>
               </tgroup>
             </informaltable>
diff -r -u lib/dns/Makefile.in-orig lib/dns/Makefile.in
--- lib/dns/Makefile.in-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/Makefile.in	2004-01-01 00:00:00.000000000 +0000
@@ -66,8 +66,8 @@
 		portlist.@O@ private.@O@ \
 		rbt.@O@ rbtdb.@O@ rbtdb64.@O@ rcode.@O@ rdata.@O@ \
 		rdatalist.@O@ rdataset.@O@ rdatasetiter.@O@ rdataslab.@O@ \
-		request.@O@ resolver.@O@ result.@O@ rootns.@O@ rpz.@O@ \
-		rriterator.@O@ sdb.@O@ \
+		request.@O@ resolver.@O@ result.@O@ rootns.@O@ \
+		rpz.@O@ rrl.@O@ rriterator.@O@ sdb.@O@ \
 		sdlz.@O@ soa.@O@ ssu.@O@ ssu_external.@O@ \
 		stats.@O@ tcpmsg.@O@ time.@O@ timer.@O@ tkey.@O@ \
 		tsec.@O@ tsig.@O@ ttl.@O@ update.@O@ validator.@O@ \
@@ -93,7 +93,7 @@
 		name.c ncache.c nsec.c nsec3.c order.c peer.c portlist.c \
 		rbt.c rbtdb.c rbtdb64.c rcode.c rdata.c rdatalist.c \
 		rdataset.c rdatasetiter.c rdataslab.c request.c \
-		resolver.c result.c rootns.c rpz.c rriterator.c \
+		resolver.c result.c rootns.c rpz.c rrl.c rriterator.c \
 		sdb.c sdlz.c soa.c ssu.c ssu_external.c \
 		stats.c tcpmsg.c time.c timer.c tkey.c \
 		tsec.c tsig.c ttl.c update.c validator.c \
diff -r -u lib/dns/include/dns/log.h-orig lib/dns/include/dns/log.h
--- lib/dns/include/dns/log.h-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/include/dns/log.h	2004-01-01 00:00:00.000000000 +0000
@@ -43,6 +43,7 @@
 #define DNS_LOGCATEGORY_DELEGATION_ONLY	(&dns_categories[10])
 #define DNS_LOGCATEGORY_EDNS_DISABLED	(&dns_categories[11])
 #define DNS_LOGCATEGORY_RPZ		(&dns_categories[12])
+#define DNS_LOGCATEGORY_RRL		(&dns_categories[13])
 
 /* Backwards compatibility. */
 #define DNS_LOGCATEGORY_GENERAL		ISC_LOGCATEGORY_GENERAL
diff -r -u lib/dns/include/dns/rrl.h-orig lib/dns/include/dns/rrl.h
--- lib/dns/include/dns/rrl.h-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/include/dns/rrl.h	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+#ifndef DNS_RRL_H
+#define DNS_RRL_H 1
+
+/*
+ * Rate limit DNS responses.
+ */
+
+#include <isc/lang.h>
+
+#include <dns/fixedname.h>
+#include <dns/rdata.h>
+#include <dns/types.h>
+
+ISC_LANG_BEGINDECLS
+
+
+/*
+ * Memory allocation or other failures.
+ */
+#define DNS_RRL_LOG_FAIL	ISC_LOG_WARNING
+/*
+ * dropped or slipped responses.
+ */
+#define DNS_RRL_LOG_DROP	ISC_LOG_INFO
+/*
+ * Major events in dropping or slipping.
+ */
+#define DNS_RRL_LOG_DEBUG1	ISC_LOG_DEBUG(3)
+/*
+ * Limit computations.
+ */
+#define DNS_RRL_LOG_DEBUG2	ISC_LOG_DEBUG(4)
+/*
+ * Less interesting.
+ */
+#define DNS_RRL_LOG_DEBUG3	ISC_LOG_DEBUG(9)
+
+
+#define DNS_RRL_LOG_ERR_LEN	64
+#define DNS_RRL_LOG_BUF_LEN	(sizeof("would continue limiting") +	\
+				 DNS_RRL_LOG_ERR_LEN +			\
+				 sizeof(" responses to ") +		\
+				 ISC_NETADDR_FORMATSIZE +		\
+				 sizeof("/128 for IN ") +		\
+				 DNS_RDATATYPE_FORMATSIZE +		\
+				 DNS_NAME_FORMATSIZE)
+
+
+typedef struct dns_rrl_hash dns_rrl_hash_t;
+
+/*
+ * Response types.
+ */
+typedef enum {
+	DNS_RRL_RTYPE_FREE,
+	DNS_RRL_RTYPE_QUERY,
+	DNS_RRL_RTYPE_NXDOMAIN,
+	DNS_RRL_RTYPE_ERROR,
+	DNS_RRL_RTYPE_ALL,
+	DNS_RRL_RTYPE_TCP,
+} dns_rrl_rtype_t;
+
+/*
+ * A rate limit bucket key.
+ * This should be small to limit the total size of the database.
+ */
+typedef struct dns_rrl_key dns_rrl_key_t;
+struct dns_rrl_key {
+	isc_uint32_t	    ip[4];
+	isc_uint32_t	    qname_hash;
+	dns_rdatatype_t	    qtype;
+	dns_rrl_rtype_t	    rtype   :3;
+	isc_boolean_t	    qclass  :3;
+	isc_boolean_t	    ipv6    :1;
+};
+
+/*
+ * A rate-limit entry.
+ * This should be small to limit the total size of the database.
+ * With gcc on ARM, the key should have __attribute((__packed__)) to
+ *	avoid padding to a multiple of 8 bytes.
+ */
+typedef struct dns_rrl_entry dns_rrl_entry_t;
+typedef ISC_LIST(dns_rrl_entry_t) dns_rrl_bin_t;
+struct dns_rrl_entry {
+	ISC_LINK(dns_rrl_entry_t) lru;
+	ISC_LINK(dns_rrl_entry_t) hlink;
+	dns_rrl_bin_t	*bin;
+	isc_stdtime_t	last_used;
+	isc_int32_t	responses;
+# define DNS_RRL_MAX_WINDOW	600
+# define DNS_RRL_MAX_RATE	(ISC_INT32_MAX / DNS_RRL_MAX_WINDOW)
+	dns_rrl_key_t	key;
+	unsigned int	slip_cnt    :4;
+# define DNS_RRL_MAX_SLIP	10
+	unsigned int	log_secs    :10;
+# define DNS_RRL_MAX_LOG_SECS	600
+# define DNS_RRL_STOP_LOG_SECS	60
+	isc_boolean_t	logged	    :1;
+	unsigned int	log_qname   :8;
+# define DNS_RRL_NUM_QNAMES	256
+};
+
+/*
+ * A hash table of rate-limit entries.
+ */
+struct dns_rrl_hash {
+	isc_stdtime_t	check_time;
+	int		length;
+	dns_rrl_bin_t	bins[1];
+};
+
+/*
+ * A block of rate-limit entries.
+ */
+typedef struct dns_rrl_block dns_rrl_block_t;
+struct dns_rrl_block {
+	ISC_LINK(dns_rrl_block_t) link;
+	int		size;
+	dns_rrl_entry_t	entries[1];
+};
+
+/*
+ * A rate limited qname buffers.
+ */
+typedef struct dns_rrl_qname_buf dns_rrl_qname_buf_t;
+struct dns_rrl_qname_buf {
+	ISC_LINK(dns_rrl_qname_buf_t) link;
+	const dns_rrl_entry_t *e;
+	unsigned int	    index;
+	dns_fixedname_t	    qname;
+};
+
+/*
+ * Per-view query rate limit parameters and a pointer to database.
+ */
+typedef struct dns_rrl dns_rrl_t;
+struct dns_rrl {
+	isc_mutex_t	lock;
+	isc_mem_t	*mctx;
+
+	isc_boolean_t	log_only;
+	int		responses_per_second;
+	int		errors_per_second;
+	int		nxdomains_per_second;
+	int		all_per_second;
+	int		window;
+	int		slip;
+	double		qps_scale;
+	int		max_entries;
+
+	dns_acl_t	*exempt;
+
+	int		num_entries;
+
+	int		qps_responses;
+	isc_stdtime_t	qps_time;
+	double		qps;
+	int		scaled_responses_per_second;
+	int		scaled_errors_per_second;
+	int		scaled_nxdomains_per_second;
+	int		scaled_all_per_second;
+	int		scaled_slip;
+
+	isc_stdtime_t	prune_time;
+
+	unsigned int	probes;
+	unsigned int	searches;
+
+	ISC_LIST(dns_rrl_block_t) blocks;
+	ISC_LIST(dns_rrl_entry_t) lru;
+
+	dns_rrl_hash_t	*hash;
+	dns_rrl_hash_t	*old_hash;
+
+	int		ipv4_prefixlen;
+	isc_uint32_t	ipv4_mask;
+	int		ipv6_prefixlen;
+	isc_uint32_t	ipv6_mask[4];
+
+	dns_rrl_entry_t	*log_ended;
+	ISC_LIST(dns_rrl_qname_buf_t) qname_free;
+	int		num_qnames;
+	dns_rrl_qname_buf_t *qnames[DNS_RRL_NUM_QNAMES];
+};
+
+typedef enum {
+	DNS_RRL_RESULT_OK,
+	DNS_RRL_RESULT_DROP,
+	DNS_RRL_RESULT_SLIP,
+} dns_rrl_result_t;
+
+dns_rrl_result_t
+dns_rrl(dns_view_t *view,
+	const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
+	dns_rdataclass_t rdclass, dns_rdatatype_t qtype,
+	dns_name_t *qname, dns_rcode_t rcode, isc_stdtime_t now,
+	isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len);
+
+void
+dns_rrl_view_destroy(dns_view_t *view);
+
+isc_result_t
+dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries);
+
+ISC_LANG_ENDDECLS
+
+#endif /* DNS_RRL_H */
diff -r -u lib/dns/include/dns/view.h-orig lib/dns/include/dns/view.h
--- lib/dns/include/dns/view.h-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/include/dns/view.h	2004-01-01 00:00:00.000000000 +0000
@@ -73,6 +73,7 @@
 
 #include <dns/acl.h>
 #include <dns/fixedname.h>
+#include <dns/rrl.h>
 #include <dns/rdatastruct.h>
 #include <dns/rpz.h>
 #include <dns/types.h>
@@ -142,6 +143,7 @@
 	dns_rbt_t *			answeracl_exclude;
 	dns_rbt_t *			denyanswernames;
 	dns_rbt_t *			answernames_exclude;
+	dns_rrl_t *			rrl;
 	isc_boolean_t			provideixfr;
 	isc_boolean_t			requestnsid;
 	dns_ttl_t			maxcachettl;
diff -r -u lib/dns/log.c-orig lib/dns/log.c
--- lib/dns/log.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/log.c	2004-01-01 00:00:00.000000000 +0000
@@ -45,6 +45,7 @@
 	{ "delegation-only", 0 },
 	{ "edns-disabled", 0 },
 	{ "rpz",	0 },
+	{ "rate-limit",	0 },
 	{ NULL, 	0 }
 };
 
diff -r -u lib/dns/rrl.c-orig lib/dns/rrl.c
--- lib/dns/rrl.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/rrl.c	2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,1242 @@
+/*
+ * Copyright (C) 2012  Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+/*! \file */
+
+/*
+ * Rate limit DNS responses.
+ */
+
+/* #define ISC_LIST_CHECKINIT */
+
+#include <config.h>
+#include <isc/mem.h>
+#include <isc/net.h>
+#include <isc/netaddr.h>
+
+#include <dns/result.h>
+#include <dns/rcode.h>
+#include <dns/rdatatype.h>
+#include <dns/rdataclass.h>
+#include <dns/log.h>
+#include <dns/rrl.h>
+#include <dns/view.h>
+
+
+static void
+log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e,
+	char *log_buf, unsigned int log_buf_len);
+
+
+/*
+ * Get a modulus for a hash function that is tolerably likely to be
+ * relatively prime to most inputs.  Of course, we get a prime for for initial
+ * values not larger than the square of the last prime.  We often get a prime
+ * after that.
+ * This works well in practice for hash tables up to at least 100
+ * times the square of the last prime and better than a multiplicative hash.
+ */
+static int
+hash_divisor(unsigned int initial) {
+	static isc_uint16_t primes[] = {
+		  3,   5,   7,  11,  13,  17,  19,  23,  29,  31,  37,  41,
+		 43,  47,  53,  59,  61,  67,  71,  73,  79,  83,  89,  97,
+#if 0
+		101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
+		163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
+		229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
+		293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
+		373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
+		443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
+		521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
+		601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
+		673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
+		757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
+		839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
+		929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
+#endif
+	};
+	int divisions, tries;
+	unsigned int result;
+	isc_uint16_t *pp, p;
+
+	result = initial;
+
+	if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) {
+		pp = primes;
+		while (*pp < result)
+			++pp;
+		return (*pp);
+	}
+
+	if ((result & 1) == 0)
+		++result;
+
+	divisions = 0;
+	tries = 1;
+	pp = primes;
+	do {
+		p = *pp++;
+		++divisions;
+		if ((result % p) == 0) {
+			++tries;
+			result += 2;
+			pp = primes;
+		}
+	} while (pp < &primes[sizeof(primes)/sizeof(primes[0])]);
+
+	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
+			      "%d hash_divisor() divisions in %d tries"
+			      " to get %d from %d",
+			      divisions, tries, result, initial);
+
+	return (result);
+}
+
+/*
+ * Convert a timestamp to a number of seconds in the past.
+ */
+static inline int
+delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
+	int delta;
+
+	delta = now - ts;
+	if (delta >= 0)
+		return (delta);
+
+	/*
+	 * The timestamp is in the future.  That future might result from
+	 * re-ordered requests, because we use timestamps on requests
+	 * instead of consulting a clock.  Timestamps in the distant future are
+	 * assumed to result from clock changes.  When the clock changes to
+	 * the past, make existing timestamps appear to be in the past.
+	 */
+	if (delta < -5)
+		return (now);
+	return (0);
+}
+
+static isc_result_t
+add_rrl_entries(dns_rrl_t *rrl, int new) {
+	unsigned int bsize;
+	dns_rrl_block_t *b;
+	dns_rrl_entry_t *e;
+	double rate;
+	int i;
+
+	if (rrl->num_entries+new >= rrl->max_entries && rrl->max_entries != 0) {
+		if (rrl->num_entries >= rrl->max_entries)
+			return (ISC_R_SUCCESS);
+		new = rrl->max_entries - rrl->num_entries;
+		if (new <= 0)
+			return (ISC_R_NOMEMORY);
+	}
+
+	/*
+	 * Try to log expansions so that the user can tune max-table-size
+	 * and min-table-size.
+	 */
+	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) &&
+	    rrl->hash != NULL ) {
+		rate = rrl->probes;
+		if (rrl->searches != 0)
+			rate /= rrl->searches;
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
+			      "increase from %d to %d RRL entries with"
+			      " %d bins; average search length %.1f",
+			      rrl->num_entries, rrl->num_entries+new,
+			      rrl->hash->length, rate);
+	}
+
+	bsize = sizeof(dns_rrl_block_t) + (new-1)*sizeof(dns_rrl_entry_t);
+	b = isc_mem_get(rrl->mctx, bsize);
+	if (b == NULL) {
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
+			      "isc_mem_get(%d) failed for RRL entries",
+			      bsize);
+		return (ISC_R_NOMEMORY);
+	}
+	memset(b, 0, bsize);
+	b->size = bsize;
+
+	e = b->entries;
+	rrl->log_ended = e;
+	for (i = 0; i < new; ++i, ++e) {
+		ISC_LINK_INIT(e, hlink);
+		ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
+	}
+	rrl->num_entries += new;
+	ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);
+
+	return (ISC_R_SUCCESS);
+}
+
+static inline dns_rrl_bin_t *
+get_rrl_bin(dns_rrl_hash_t *hash, unsigned int hval) {
+	return (&hash->bins[hval % hash->length]);
+}
+
+static void
+free_old_hash(dns_rrl_t *rrl) {
+	dns_rrl_hash_t *old_hash;
+	dns_rrl_bin_t *old_bin;
+	dns_rrl_entry_t *e;
+
+	old_hash = rrl->old_hash;
+	for (old_bin = &old_hash->bins[0];
+	     old_bin < &old_hash->bins[old_hash->length];
+	     ++old_bin) {
+		while ((e = ISC_LIST_HEAD(*old_bin)) != NULL) {
+			ISC_LIST_UNLINK(*e->bin, e, hlink);
+			e->bin = NULL;
+		}
+	}
+
+	isc_mem_put(rrl->mctx, old_hash,
+		    sizeof(*old_hash)
+		    + (old_hash->length-1)*sizeof(old_hash->bins[0]));
+	rrl->old_hash = NULL;
+}
+
+static isc_result_t
+expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
+	dns_rrl_hash_t *hash;
+	int old_bins, new_bins, hsize;
+	double rate;
+
+	if (rrl->old_hash != NULL)
+		free_old_hash(rrl);
+
+	/*
+	 * Most searches fail and so go to the end of the chain.
+	 * Use a small hash table load factor.
+	 */
+	old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
+	new_bins = old_bins/8 + old_bins;
+	if (new_bins < rrl->num_entries)
+		new_bins = rrl->num_entries;
+	new_bins = hash_divisor(new_bins);
+
+	hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]);
+	hash = isc_mem_get(rrl->mctx, hsize);
+	if (hash == NULL) {
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
+			      "isc_mem_get(%d) failed for"
+			      " RRL hash table",
+			      hsize);
+		return (ISC_R_NOMEMORY);
+	}
+	memset(hash, 0, hsize);
+	hash->length = new_bins;
+
+	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1) && old_bins != 0) {
+		rate = rrl->probes;
+		if (rrl->searches != 0)
+			rate /= rrl->searches;
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST,
+			      DNS_RRL_LOG_DEBUG1,
+			      "increase from %d to %d RRL bins for"
+			      " %d entries; average search length %.1f",
+			      old_bins, new_bins, rrl->num_entries, rate);
+	}
+
+	rrl->old_hash = rrl->hash;
+	if (rrl->old_hash != NULL)
+		rrl->old_hash->check_time = now;
+	rrl->hash = hash;
+
+	return (ISC_R_SUCCESS);
+}
+
+static void
+rrl_entry_ref(dns_rrl_t *rrl, dns_rrl_entry_t *e, dns_rrl_bin_t *new_bin,
+	      int probes, isc_stdtime_t now)
+{
+	/*
+	 * Make the entry most recently used.
+	 */
+	if (ISC_LIST_HEAD(rrl->lru) != e) {
+		if (e == rrl->log_ended) {
+			if (e->lru.next != NULL)
+				rrl->log_ended = e->lru.next;
+			else
+				rrl->log_ended = e->lru.prev;
+		}
+		ISC_LIST_UNLINK(rrl->lru, e, lru);
+		ISC_LIST_PREPEND(rrl->lru, e, lru);
+	}
+
+	/*
+	 * Move the entry to the head of its hash chain.
+	 */
+	if (ISC_LIST_HEAD(*new_bin) != e) {
+		if (e->bin != NULL)
+			ISC_LIST_UNLINK(*e->bin, e, hlink);
+		ISC_LIST_PREPEND(*new_bin, e, hlink);
+		e->bin = new_bin;
+	}
+
+	/*
+	 * Expand the hash table if it is time and necessary.
+	 * This will leave the newly referenced entry in a chain in the
+	 * old hash table.  It will migrate to the new hash table the next
+	 * time it is used or be cut loose when the old hash table is destroyed.
+	 */
+	rrl->probes += probes;
+	++rrl->searches;
+	if (rrl->searches > 100 &&
+	    delta_rrl_time(rrl->hash->check_time, now) >= 10) {
+		if (rrl->probes/rrl->searches > 2)
+			expand_rrl_hash(rrl, now);
+		rrl->hash->check_time = now;
+		rrl->probes = 0;
+		rrl->searches = 0;
+	}
+}
+
+static inline isc_boolean_t
+rrl_key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
+	return (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0 ? ISC_TRUE : ISC_FALSE);
+}
+
+/*
+ * Construct the database key.
+ * Use a hash of the DNS query name to save space in the database.
+ * Collisions result in legitimate rate limiting responses for one
+ * query name also limiting responses for other names to the
+ * same client.  This is rare and benign enough given the large
+ * space costs compared to keeping the entire name in the database
+ * entry or the time costs of dynamic allocation.
+ */
+static isc_uint32_t
+make_key(dns_rrl_t *rrl, dns_rrl_key_t *key,
+	 const isc_sockaddr_t *client_addr,
+	 dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
+	 dns_rrl_rtype_t rtype)
+{
+	isc_uint32_t hval;
+	int i;
+
+	memset(key, 0, sizeof(*key));
+
+	key->rtype = rtype;
+	hval = rtype;
+	if (rtype == DNS_RRL_RTYPE_QUERY ||
+	    rtype == DNS_RRL_RTYPE_NXDOMAIN) {
+		/*
+		 * Map dns_rdataclass_reserved0 = 0 -> 2
+		 *	dns_rdataclass_in = 1	    -> 3
+		 *	dns_rdataclass_chaos = 3    -> 5
+		 *	dns_rdataclass_hs = 4	    -> 6
+		 *	dns_rdataclass_none = 254   -> 0
+		 *	dns_rdataclass_any = 255    -> 1
+		 * and trust that there will never be significant changes.
+		 */
+		key->qclass = qclass+2;
+		key->qtype = qtype;
+		hval += qtype<<8;
+	}
+
+	if (qname != NULL && qname->labels != 0) {
+		/*
+		 * Ignore the first label of wildcards.
+		 */
+		if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
+		    (i = dns_name_countlabels(qname)) > 1) {
+			dns_fixedname_t suffixf;
+			dns_name_t *suffix;
+
+			dns_fixedname_init(&suffixf);
+			suffix = dns_fixedname_name(&suffixf);
+			dns_name_split(qname, i-1, NULL, suffix);
+			key->qname_hash = dns_name_hashbylabel(suffix,
+							ISC_FALSE);
+		} else {
+			key->qname_hash = dns_name_hashbylabel(qname,
+							ISC_FALSE);
+		}
+		hval += key->qname_hash;
+	}
+
+	switch (client_addr->type.sa.sa_family) {
+	case AF_INET:
+		key->ip[3] = (client_addr->type.sin.sin_addr.s_addr &
+			      rrl->ipv4_mask);
+		hval = (hval>>31) + (hval<<1) + key->ip[3];
+		break;
+	case AF_INET6:
+		key->ipv6 = ISC_TRUE;
+		memcpy(key->ip, &client_addr->type.sin6.sin6_addr,
+		       sizeof(key->ip));
+		for (i = 0; i < 4; ++i) {
+			key->ip[i] &= rrl->ipv6_mask[i];
+			hval = (hval>>31) + (hval<<1) + key->ip[i];
+		}
+		break;
+	}
+
+	return (hval);
+}
+
+static inline int
+response_balance(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
+	int balance;
+
+	balance = e->responses;
+	if (balance < 0)
+		switch (e->key.rtype) {
+		case DNS_RRL_RTYPE_QUERY:
+			balance += age * rrl->responses_per_second;
+			break;
+		case DNS_RRL_RTYPE_NXDOMAIN:
+			balance += age * rrl->nxdomains_per_second;
+			break;
+		case DNS_RRL_RTYPE_ERROR:
+			balance += age * rrl->errors_per_second;
+			break;
+		case DNS_RRL_RTYPE_ALL:
+			balance += age * rrl->all_per_second;
+			break;
+		case DNS_RRL_RTYPE_TCP:
+			balance += age;
+			break;
+		default:
+			INSIST(0);
+	}
+	return (balance);
+}
+
+/*
+ * Search for an entry for a response and optionally create it.
+ */
+static dns_rrl_entry_t *
+get_rrl_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
+	      dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
+	      dns_rrl_rtype_t rtype, isc_stdtime_t now, isc_boolean_t create,
+	      char *log_buf, unsigned int log_buf_len)
+{
+	dns_rrl_key_t key;
+	isc_uint32_t hval;
+	dns_rrl_hash_t *hash, *old_hash;
+	dns_rrl_entry_t *e;
+	dns_rrl_bin_t *new_bin, *old_bin;
+	int probes, age;
+
+	hval = make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
+
+	/*
+	 * Look for the entry in the current hash table.
+	 */
+	hash = rrl->hash;
+	new_bin = get_rrl_bin(hash, hval);
+	for (e = ISC_LIST_HEAD(*new_bin), probes = 1;
+	     e != NULL;
+	     e = ISC_LIST_NEXT(e, hlink), ++probes) {
+		if (rrl_key_cmp(&e->key, &key)) {
+			rrl_entry_ref(rrl, e, new_bin, probes, now);
+			return (e);
+		}
+	}
+
+	/*
+	 * Look in the old hash table if we did not find the entry.
+	 */
+	old_hash = rrl->old_hash;
+	if (old_hash != NULL) {
+		old_bin = get_rrl_bin(old_hash, hval);
+		for (e = ISC_LIST_HEAD(*old_bin);
+		     e != NULL;
+		     e = ISC_LIST_NEXT(e, hlink)) {
+			if (rrl_key_cmp(&e->key, &key)) {
+				rrl_entry_ref(rrl, e, new_bin, probes, now);
+				return (e);
+			}
+		}
+
+		/*
+		 * Discard prevous hash table when its entries are all old.
+		 */
+		if (delta_rrl_time(old_hash->check_time, now) > rrl->window)
+			free_old_hash(rrl);
+	}
+
+	if (!create)
+		return (NULL);
+
+	/*
+	 * The block does not already exist, so create it.
+	 * Unroll the first circuit of the loop to cover most cases.
+	 * Immediately a new create entry if the oldest is fresh.
+	 * Preserve penalized entries.
+	 * Try to make more entries if none are idle.
+	 * Steal the oldest entry if we cannot make more.
+	 */
+	e = ISC_LIST_TAIL(rrl->lru);
+	age = delta_rrl_time(e->last_used, now);
+	if (age <= rrl->window) {
+		for (;;) {
+			if (age <= 1) {
+				add_rrl_entries(rrl,
+						ISC_MIN((rrl->num_entries+1)/2,
+							1000));
+				e = ISC_LIST_TAIL(rrl->lru);
+				break;
+			}
+			if (response_balance(rrl, e, age) >= 0)
+				break;
+
+			e = e->lru.prev;
+			if (e == NULL) {
+				add_rrl_entries(rrl,
+						ISC_MIN((rrl->num_entries+1)/2,
+							1000));
+				e = ISC_LIST_TAIL(rrl->lru);
+				break;
+			}
+			age = delta_rrl_time(e->last_used, now);
+		}
+	}
+	if (e->logged)
+		log_end(rrl, e, log_buf, log_buf_len);
+	e->key = key;
+	e->last_used = 0;
+	rrl_entry_ref(rrl, e, new_bin, probes, now);
+	return (e);
+}
+
+static inline dns_rrl_result_t
+debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
+		const isc_sockaddr_t *client_addr, isc_stdtime_t now,
+		char *log_buf, unsigned int log_buf_len)
+{
+	int rate, new_rate, *ratep, slip, new_slip, age, log_secs, min;
+	const char *rate_str;
+	dns_rrl_entry_t const *credit_e;
+	dns_rrl_result_t rrl_result;
+
+	/*
+	 * Pick the rate counter.  Optionally adjust the rates by the estimated
+	 * query/second rate.
+	 */
+	switch (e->key.rtype) {
+	case DNS_RRL_RTYPE_QUERY:
+		rate = rrl->responses_per_second;
+		ratep = &rrl->scaled_responses_per_second;
+		break;
+	case DNS_RRL_RTYPE_NXDOMAIN:
+		rate = rrl->nxdomains_per_second;
+		ratep = &rrl->scaled_nxdomains_per_second;
+		break;
+	case DNS_RRL_RTYPE_ERROR:
+		rate = rrl->errors_per_second;
+		ratep = &rrl->scaled_errors_per_second;
+		break;
+	case DNS_RRL_RTYPE_ALL:
+		rate = rrl->all_per_second;
+		ratep = &rrl->scaled_all_per_second;
+		break;
+	default:
+		INSIST(0);
+	}
+	if (rate == 0)
+		return (DNS_RRL_RESULT_OK);
+
+	if (scale < 1.0) {
+		/*
+		 * The limit for clients that have used TCP is not scaled.
+		 */
+		credit_e = get_rrl_entry(rrl, client_addr,
+					 dns_rdatatype_none, NULL, 0,
+					 DNS_RRL_RTYPE_TCP, now, ISC_FALSE,
+					 log_buf, log_buf_len);
+		if (credit_e != NULL) {
+			age = delta_rrl_time(credit_e->last_used, now);
+			if (age < rrl->window)
+				scale = 1.0;
+		}
+	}
+	if (scale < 1.0) {
+		new_rate = rate * scale;
+		if (new_rate < 1)
+			new_rate = 1;
+		if (*ratep != new_rate) {
+			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
+				switch (e->key.rtype) {
+				case DNS_RRL_RTYPE_QUERY:
+					rate_str = "responses-per-second";
+					break;
+				case DNS_RRL_RTYPE_NXDOMAIN:
+					rate_str = "nxdomains-per-second";
+					break;
+				case DNS_RRL_RTYPE_ERROR:
+					rate_str = "errors-per-second";
+					break;
+				case DNS_RRL_RTYPE_ALL:
+					rate_str = "all-per-second";
+					break;
+				default:
+					INSIST(0);
+				}
+				isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+					      DNS_LOGMODULE_REQUEST,
+					      DNS_RRL_LOG_DEBUG1,
+					      "%d qps scaled %s by %.2f"
+					      " from %d to %d",
+					      (int)qps, rate_str, scale,
+					      rate, new_rate);
+			}
+			rate = new_rate;
+			*ratep = rate;
+		}
+	}
+
+	min = -rrl->window * rate;
+
+	/*
+	 * Treat time jumps into the past as no time.
+	 * Treat entries older than the window as if they were just created
+	 * Credit other entries.
+	 */
+	rrl_result = DNS_RRL_RESULT_DROP;
+	age = delta_rrl_time(e->last_used, now);
+	if (age > 0) {
+		/*
+		 * Credit tokens earned during elapsed time.
+		 */
+		if (age > rrl->window) {
+			e->responses = rate;
+			e->slip_cnt = 0;
+		} else {
+			e->responses += rate*age;
+			if (e->responses > rate) {
+				e->responses = rate;
+				e->slip_cnt = 0;
+			}
+		}
+		/*
+		 * Find the seconds since last log message without overflowing
+		 * small counter.
+		 * This counter should be reset when an entry is create (or
+		 * recycled) and after at least one second without limiting.
+		 * It is not necessarily reset when some requests are answered
+		 * provided other requests continue to be dropped or slipped.
+		 * This can happen when the request rate is just at the limit.
+		 */
+		if (e->logged) {
+			log_secs = e->log_secs;
+			log_secs += age;
+			if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0)
+				log_secs = DNS_RRL_MAX_LOG_SECS;
+			e->log_secs = log_secs;
+		}
+	}
+	e->last_used = now;
+
+	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
+			      "rrl age=%d  responses=%d", age, e->responses);
+
+	/*
+	 * Debit the entry for this response.
+	 */
+	if (--e->responses >= 0)
+		return (DNS_RRL_RESULT_OK);
+
+	if (e->responses < min)
+		e->responses = min;
+
+	/*
+	 * Drop this response unless it should leak.
+	 */
+	slip = rrl->slip;
+	if (slip > 2 && scale < 1.0) {
+		new_slip *= scale;
+		if (new_slip < 2)
+			new_slip = 2;
+		if (rrl->scaled_slip != new_slip) {
+			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1))
+				isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+					      DNS_LOGMODULE_REQUEST,
+					      DNS_RRL_LOG_DEBUG1,
+					      "%d qps scaled slip"
+					      " by %.2f from %d to %d",
+					      (int)qps, scale,
+					      slip, new_slip);
+			slip = new_slip;
+			rrl->scaled_slip = slip;
+		}
+	}
+	if (slip != 0 && ++e->slip_cnt >= slip) {
+		e->slip_cnt = 0;
+		return (DNS_RRL_RESULT_SLIP);
+	}
+
+	return (rrl_result);
+}
+
+static inline dns_rrl_qname_buf_t *
+get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
+	dns_rrl_qname_buf_t *qbuf;
+
+	qbuf = rrl->qnames[e->log_qname];
+	if (qbuf == NULL  || qbuf->e != e)
+		return (NULL);
+	return (qbuf);
+}
+
+static inline void
+free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
+	dns_rrl_qname_buf_t *qbuf;
+
+	qbuf = get_qname(rrl, e);
+	if (qbuf != NULL) {
+		qbuf->e = NULL;
+		ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
+	}
+}
+
+static void
+add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len)
+{
+	isc_region_t region;
+
+	isc_buffer_availableregion(lb, &region);
+	if (str_len >= region.length) {
+		if (region.length <= 0)
+			return;
+		str_len = region.length;
+	}
+	memcpy(region.base, str, str_len);
+	isc_buffer_add(lb, str_len);
+}
+
+#define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)
+
+/*
+ * Build strings for the logs
+ */
+static void
+make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e,
+	     const char *str1, const char *str2, isc_boolean_t plural,
+	     dns_rrl_result_t rrl_result,
+	     dns_name_t *qname, isc_boolean_t save_qname, dns_rcode_t rcode,
+	     char *log_buf, unsigned int log_buf_len)
+{
+	isc_buffer_t lb;
+	dns_rrl_qname_buf_t *qbuf;
+	isc_netaddr_t cidr;
+	char strbuf[ISC_MAX(sizeof("/123"), sizeof("  (12345678)"))];
+	isc_result_t msg_result;
+
+	if (log_buf_len <= 1) {
+		if (log_buf_len == 1)
+			log_buf[0] = '\0';
+		return;
+	}
+	isc_buffer_init(&lb, log_buf, log_buf_len-1);
+
+	if (str1 != NULL)
+		add_log_str(&lb, str1, strlen(str1));
+	if (str2 != NULL)
+		add_log_str(&lb, str2, strlen(str2));
+
+	switch (rrl_result) {
+	case DNS_RRL_RESULT_OK:
+		break;
+	case DNS_RRL_RESULT_DROP:
+		ADD_LOG_CSTR(&lb, "drop ");
+		break;
+	case DNS_RRL_RESULT_SLIP:
+		ADD_LOG_CSTR(&lb, "slip ");
+		break;
+	default:
+		INSIST(0);
+		break;
+	}
+
+
+	switch (e->key.rtype) {
+	case DNS_RRL_RTYPE_QUERY:
+	case DNS_RRL_RTYPE_ALL:
+		break;
+	case DNS_RRL_RTYPE_NXDOMAIN:
+		ADD_LOG_CSTR(&lb, "NXDOMAIN ");
+		break;
+	case DNS_RRL_RTYPE_ERROR:
+		if (rcode == dns_rcode_noerror) {
+			ADD_LOG_CSTR(&lb, "error ");
+		} else {
+			msg_result = dns_rcode_totext(rcode, &lb);
+			if (msg_result == ISC_R_SUCCESS) {
+				ADD_LOG_CSTR(&lb, " ");
+			} else {
+				ADD_LOG_CSTR(&lb, "UNKNOWN RCODE ");
+			}
+		}
+		break;
+	default:
+		INSIST(0);
+	}
+
+	if (plural)
+		ADD_LOG_CSTR(&lb, "responses to ");
+	else
+		ADD_LOG_CSTR(&lb, "response to ");
+
+	memset(&cidr, 0, sizeof(cidr));
+	if (e->key.ipv6) {
+		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
+		cidr.family = AF_INET6;
+		memcpy(&cidr.type.in6, e->key.ip, sizeof(cidr.type.in6));
+	} else {
+		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
+		cidr.family = AF_INET;
+		cidr.type.in.s_addr = e->key.ip[3];
+	}
+	msg_result = isc_netaddr_totext(&cidr, &lb);
+	if (msg_result != ISC_R_SUCCESS)
+		ADD_LOG_CSTR(&lb, "?");
+	add_log_str(&lb, strbuf, strlen(strbuf));
+
+	if (e->key.rtype == DNS_RRL_RTYPE_QUERY ||
+	    e->key.rtype == DNS_RRL_RTYPE_NXDOMAIN) {
+		qbuf = get_qname(rrl, e);
+		if (save_qname && qbuf == NULL &&
+		    qname != NULL && dns_name_isabsolute(qname)) {
+			/*
+			 * Capture the qname for the "stop limiting" message.
+			 */
+			qbuf = ISC_LIST_TAIL(rrl->qname_free);
+			if (qbuf != NULL) {
+				ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
+			} else if (rrl->num_qnames < DNS_RRL_NUM_QNAMES) {
+				qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
+				if (qbuf != NULL) {
+					memset(qbuf, 0, sizeof(*qbuf));
+					qbuf->index = rrl->num_qnames;
+					rrl->qnames[rrl->num_qnames++] = qbuf;
+				} else {
+					isc_log_write(dns_lctx,
+						      DNS_LOGCATEGORY_RRL,
+						      DNS_LOGMODULE_REQUEST,
+						      DNS_RRL_LOG_FAIL,
+						      "isc_mem_get(%d)"
+						      " failed for RRL qname",
+						      (int)sizeof(*qbuf));
+				}
+			}
+			if (qbuf != NULL) {
+				e->log_qname = qbuf->index;
+				qbuf->e = e;
+				dns_fixedname_init(&qbuf->qname);
+				dns_name_copy(qname,
+					      dns_fixedname_name(&qbuf->qname),
+					      NULL);
+			}
+		}
+		if (qbuf != NULL)
+			qname = dns_fixedname_name(&qbuf->qname);
+		if (qname != NULL) {
+			ADD_LOG_CSTR(&lb, " for ");
+			dns_name_totext(qname, ISC_TRUE, &lb);
+			ADD_LOG_CSTR(&lb, " ");
+		} else {
+			ADD_LOG_CSTR(&lb, " for (?) ");
+		}
+		dns_rdataclass_totext(e->key.qclass-2, &lb);
+		ADD_LOG_CSTR(&lb, " ");
+		dns_rdatatype_totext(e->key.qtype, &lb);
+		snprintf(strbuf, sizeof(strbuf), "  (%08x)", e->key.qname_hash);
+		add_log_str(&lb, strbuf, strlen(strbuf));
+	}
+
+	/*
+	 * We saved room for '\0'.
+	 */
+	log_buf[isc_buffer_usedlength(&lb)] = '\0';
+}
+
+static void
+log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e,
+	char *log_buf, unsigned int log_buf_len)
+{
+	if (e->logged) {
+		make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
+			     "stop limiting ", ISC_TRUE,
+			     DNS_RRL_RESULT_OK, NULL, ISC_FALSE,
+			     dns_rcode_noerror, log_buf, log_buf_len);
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
+			      "%s", log_buf);
+		free_qname(rrl, e);
+		e->logged = ISC_FALSE;
+	}
+}
+
+/*
+ * Log some messages for streams that have stopped being rate limited
+ * or really for buckets that are now idle after having done something.
+ */
+static void
+prune_qnames(dns_rrl_t *rrl, isc_stdtime_t now,
+	     char *log_buf, unsigned int log_buf_len)
+{
+	dns_rrl_entry_t *e, *e_prev;
+	isc_boolean_t move_ptr;
+	int cnt, age;
+
+	move_ptr = ISC_TRUE;
+	cnt = 8;
+
+	for (e = rrl->log_ended; e != NULL; e = e->lru.prev) {
+		e_prev = e;
+		if (!e->logged)
+			continue;
+
+		age = delta_rrl_time(e->last_used, now);
+		if (age <= rrl->window) {
+			rrl->prune_time = now;
+			break;
+		}
+
+		if (age < DNS_RRL_STOP_LOG_SECS ||
+		    response_balance(rrl, e, age) < 0) {
+			move_ptr = ISC_FALSE;
+			continue;
+		}
+
+		log_end(rrl, e, log_buf, log_buf_len);
+
+		/*
+		 * Do not log many messages at once to avoid stalling real work.
+		 */
+		if (--cnt <= 0)
+			break;
+	}
+	if (e == NULL)
+		rrl->prune_time = now;
+	if (move_ptr)
+		rrl->log_ended = e_prev;
+}
+
+/*
+ * Main rate limit interface.
+ */
+dns_rrl_result_t
+dns_rrl(dns_view_t *view,
+	const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
+	dns_rdataclass_t qclass, dns_rdatatype_t qtype,
+	dns_name_t *qname, dns_rcode_t rcode, isc_stdtime_t now,
+	isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len)
+{
+	dns_rrl_t *rrl;
+	dns_rrl_rtype_t rtype;
+	dns_rrl_entry_t *e;
+	isc_netaddr_t netclient;
+	int secs;
+	double qps, scale;
+	int exempt_match;
+	isc_result_t result;
+	dns_rrl_result_t rrl_result;
+
+	INSIST(log_buf != NULL && log_buf_len > 0);
+
+	rrl = view->rrl;
+	if (rrl->exempt != NULL) {
+		isc_netaddr_fromsockaddr(&netclient, client_addr);
+		result = dns_acl_match(&netclient, NULL, rrl->exempt,
+				       &view->aclenv, &exempt_match, NULL);
+		if (result == ISC_R_SUCCESS && exempt_match > 0)
+			return (DNS_RRL_RESULT_OK);
+	}
+
+	LOCK(&rrl->lock);
+
+	/*
+	 * Estimate total query per second rate when scaling by qps.
+	 */
+	if (rrl->qps_scale == 0) {
+		qps = 0.0;
+		scale = 1.0;
+	} else {
+		++rrl->qps_responses;
+		secs = delta_rrl_time(rrl->qps_time, now);
+		if (secs <= 0) {
+			qps = rrl->qps;
+		} else {
+			qps = (1.0*rrl->qps_responses) / secs;
+			if (secs >= rrl->window) {
+				if (isc_log_wouldlog(dns_lctx,
+						     DNS_RRL_LOG_DEBUG3))
+					isc_log_write(dns_lctx,
+						      DNS_LOGCATEGORY_RRL,
+						      DNS_LOGMODULE_REQUEST,
+						      DNS_RRL_LOG_DEBUG3,
+						      "%d responses/%d seconds"
+						      " = %d qps",
+						      rrl->qps_responses, secs,
+						      (int)qps);
+				rrl->qps = qps;
+				rrl->qps_responses = 0;
+				rrl->qps_time = now;
+			} else if (qps < rrl->qps) {
+				qps = rrl->qps;
+			}
+		}
+		scale = rrl->qps_scale / qps;
+	}
+
+	if (rrl->prune_time != now)
+		prune_qnames(rrl, now, log_buf, log_buf_len);
+
+	/*
+	 * Notice TCP responses when scaling limits by qps.
+	 * Do not try to rate limit TCP responses.
+	 */
+	if (is_tcp) {
+		if (scale < 1.0) {
+			e = get_rrl_entry(rrl, client_addr,
+					  dns_rdatatype_none, NULL, 0,
+					  DNS_RRL_RTYPE_TCP, now, ISC_TRUE,
+					  log_buf, log_buf_len);
+			if (e != NULL) {
+				e->responses = -(rrl->window+1);
+				e->last_used = now;
+			}
+		}
+		UNLOCK(&rrl->lock);
+		return (ISC_R_SUCCESS);
+	}
+
+	/*
+	 * Find the right kind of entry, creating it if necessary.
+	 * If that is impossible, then nothing more can be done
+	 */
+	if (rcode == dns_rcode_noerror)
+		rtype = DNS_RRL_RTYPE_QUERY;
+	else if (rcode == dns_rcode_nxdomain)
+		rtype = DNS_RRL_RTYPE_NXDOMAIN;
+	else
+		rtype = DNS_RRL_RTYPE_ERROR;
+	e = get_rrl_entry(rrl, client_addr, qtype, qname, qclass, rtype,
+			  now, ISC_TRUE, log_buf, log_buf_len);
+	if (e == NULL) {
+		UNLOCK(&rrl->lock);
+		return (DNS_RRL_RESULT_OK);
+	}
+
+	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
+		/*
+		 * Do not worry about speed or releasing the lock.
+		 * This message appears before messages from debit_rrl_entry().
+		 */
+		make_log_buf(rrl, e, "consider limiting ", NULL, ISC_FALSE,
+			     DNS_RRL_RESULT_OK, qname, ISC_FALSE,
+			     rcode, log_buf, log_buf_len);
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
+			      "%s", log_buf);
+	}
+
+	rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
+				     log_buf, log_buf_len);
+
+	if (rrl->all_per_second != 0) {
+		/*
+		 * We must debit the all-per-second token bucket if we have
+		 * an all-per-second limit for the IP address.
+		 * The all-per-second limit determines the log message
+		 * when both limits are hit.
+		 */
+		dns_rrl_entry_t *e_all;
+		dns_rrl_result_t rrl_all_result;
+
+		e_all = get_rrl_entry(rrl, client_addr,
+				      dns_rdatatype_none, NULL, 0,
+				      DNS_RRL_RTYPE_ALL, now, ISC_TRUE,
+				      log_buf, log_buf_len);
+		if (e_all == NULL) {
+			UNLOCK(&rrl->lock);
+			return (DNS_RRL_RESULT_OK);
+		}
+		rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
+						 client_addr, now,
+						 log_buf, log_buf_len);
+		if (rrl_all_result != DNS_RRL_RESULT_OK) {
+			int level;
+
+			e = e_all;
+			if (rrl_result == DNS_RRL_RESULT_OK)
+				level = DNS_RRL_LOG_DEBUG2;
+			else
+				level = DNS_RRL_LOG_DEBUG1;
+			rrl_result = rrl_all_result;
+			if (isc_log_wouldlog(dns_lctx, level)) {
+				make_log_buf(rrl, e,
+					     "prefer all-per-second limiting ",
+					     NULL, ISC_TRUE, DNS_RRL_RESULT_OK,
+					     NULL, ISC_FALSE, dns_rcode_noerror,
+					     log_buf, log_buf_len);
+				isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+					      DNS_LOGMODULE_REQUEST, level,
+					      "%s", log_buf);
+			}
+		}
+	}
+
+	if (rrl_result == DNS_RRL_RESULT_OK) {
+		UNLOCK(&rrl->lock);
+		return (DNS_RRL_RESULT_OK);
+	}
+
+	/*
+	 * Log occassionally in the rate-limit category.
+	 */
+	if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
+	    isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) {
+		make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
+			     e->logged ? "continue limiting " : "limit ",
+			     ISC_TRUE, DNS_RRL_RESULT_OK,
+			     qname, ISC_TRUE, rcode, log_buf, log_buf_len);
+		e->logged = ISC_TRUE;
+		e->log_secs = 0;
+		/*
+		 * Avoid holding the lock.
+		 */
+		if (!wouldlog) {
+			UNLOCK(&rrl->lock);
+			e = NULL;
+		}
+		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
+			      "%s", log_buf);
+	}
+
+	/*
+	 * Make a log message for the caller.
+	 */
+	if (wouldlog)
+		make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
+			     NULL, ISC_FALSE, rrl_result,
+			     qname, ISC_FALSE, rcode, log_buf, log_buf_len);
+
+	if (e != NULL) {
+		/*
+		 * Do not save the qname unless we might needed it for
+		 * the ending log message.
+		 */
+		if (!e->logged)
+			free_qname(rrl, e);
+		UNLOCK(&rrl->lock);
+	}
+	return (rrl_result);
+}
+
+void
+dns_rrl_view_destroy(dns_view_t *view) {
+	dns_rrl_t *rrl;
+	dns_rrl_block_t *b;
+	dns_rrl_hash_t *h;
+	int i;
+
+	rrl = view->rrl;
+	if (rrl == NULL)
+		return;
+	view->rrl = NULL;
+
+	/*
+	 * Assume the caller takes care of locking the view and anything else.
+	 */
+	do {
+		char log_buf[DNS_RRL_LOG_BUF_LEN];
+
+		prune_qnames(rrl, rrl->prune_time+DNS_RRL_MAX_WINDOW+1,
+			      log_buf, sizeof(log_buf));
+	} while (rrl->log_ended->lru.prev != NULL);
+
+	for (i = 0; i < DNS_RRL_NUM_QNAMES; ++i) {
+		if (rrl->qnames[i] == NULL)
+			break;
+		isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
+	}
+
+	if (rrl->exempt != NULL)
+		dns_acl_detach(&rrl->exempt);
+
+	DESTROYLOCK(&rrl->lock);
+
+	while (!ISC_LIST_EMPTY(rrl->blocks)) {
+		b = ISC_LIST_HEAD(rrl->blocks);
+		ISC_LIST_UNLINK(rrl->blocks, b, link);
+		isc_mem_put(rrl->mctx, b, b->size);
+	}
+
+	h = rrl->hash;
+	if (h != NULL)
+		isc_mem_put(rrl->mctx, h,
+			    sizeof(*h)+(h->length-1)*sizeof(h->bins[0]));
+
+	h = rrl->old_hash;
+	if (h != NULL)
+		isc_mem_put(rrl->mctx, h,
+			    sizeof(*h)+(h->length-1)*sizeof(h->bins[0]));
+
+	isc_mem_put(rrl->mctx, rrl, sizeof(*rrl));
+}
+
+isc_result_t
+dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
+	dns_rrl_t *rrl;
+	isc_result_t result;
+
+	*rrlp = NULL;
+
+	rrl = isc_mem_get(view->mctx, sizeof(*rrl));
+	if (rrl == NULL)
+		return (ISC_R_NOMEMORY);
+	memset(rrl, 0, sizeof(*rrl));
+	rrl->mctx = view->mctx;
+	result = isc_mutex_init(&rrl->lock);
+	if (result != ISC_R_SUCCESS) {
+		isc_mem_put(view->mctx, rrl, sizeof(*rrl));
+		return (result);
+	}
+
+	view->rrl = rrl;
+
+	result = add_rrl_entries(rrl, min_entries);
+	if (result != ISC_R_SUCCESS) {
+		dns_rrl_view_destroy(view);
+		return (result);
+	}
+	result = expand_rrl_hash(rrl, 0);
+	if (result != ISC_R_SUCCESS) {
+		dns_rrl_view_destroy(view);
+		return (result);
+	}
+
+	*rrlp = rrl;
+	return (ISC_R_SUCCESS);
+}
diff -r -u lib/dns/view.c-orig lib/dns/view.c
--- lib/dns/view.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/view.c	2004-01-01 00:00:00.000000000 +0000
@@ -48,6 +48,7 @@
 #include <dns/masterdump.h>
 #include <dns/order.h>
 #include <dns/peer.h>
+#include <dns/rrl.h>
 #include <dns/rbt.h>
 #include <dns/rdataset.h>
 #include <dns/request.h>
@@ -181,6 +182,7 @@
 	view->answeracl_exclude = NULL;
 	view->denyanswernames = NULL;
 	view->answernames_exclude = NULL;
+	view->rrl = NULL;
 	view->provideixfr = ISC_TRUE;
 	view->maxcachettl = 7 * 24 * 3600;
 	view->maxncachettl = 3 * 3600;
@@ -331,9 +333,11 @@
 		dns_acache_detach(&view->acache);
 	}
 	dns_rpz_view_destroy(view);
+	dns_rrl_view_destroy(view);
 #else
 	INSIST(view->acache == NULL);
 	INSIST(ISC_LIST_EMPTY(view->rpz_zones));
+	INSIST(view->rrl == NULL);
 #endif
 	if (view->requestmgr != NULL)
 		dns_requestmgr_detach(&view->requestmgr);
diff -r -u lib/isccfg/namedconf.c-orig lib/isccfg/namedconf.c
--- lib/isccfg/namedconf.c-orig	2004-01-01 00:00:00.000000000 +0000
+++ lib/isccfg/namedconf.c	2004-01-01 00:00:00.000000000 +0000
@@ -1244,6 +1244,39 @@
 };
 
 
+/*
+ * rate-limit
+ */
+static cfg_clausedef_t rrl_clauses[] = {
+	{ "responses-per-second", &cfg_type_uint32, 0 },
+	{ "errors-per-second", &cfg_type_uint32, 0 },
+	{ "nxdomains-per-second", &cfg_type_uint32, 0 },
+	{ "responses-per-second", &cfg_type_uint32, 0 },
+	{ "all-per-second", &cfg_type_uint32, 0 },
+	{ "slip", &cfg_type_uint32, 0 },
+	{ "window", &cfg_type_uint32, 0 },
+	{ "log-only", &cfg_type_boolean, 0 },
+	{ "qps-scale", &cfg_type_uint32, 0 },
+	{ "IPv4-prefix-length", &cfg_type_uint32, 0 },
+	{ "IPv6-prefix-length", &cfg_type_uint32, 0 },
+	{ "exempt-clients", &cfg_type_bracketed_aml, 0 },
+	{ "max-table-size", &cfg_type_uint32, 0 },
+	{ "min-table-size", &cfg_type_uint32, 0 },
+	{ NULL, NULL, 0 }
+};
+
+static cfg_clausedef_t *rrl_clausesets[] = {
+	rrl_clauses,
+	NULL
+};
+
+static cfg_type_t cfg_type_rrl = {
+	"rate-limit", cfg_parse_map, cfg_print_map, cfg_doc_map,
+	&cfg_rep_map, rrl_clausesets
+};
+
+
+
 /*%
  * dnssec-lookaside
  */
@@ -1397,6 +1430,7 @@
 	   CFG_CLAUSEFLAG_NOTCONFIGURED },
 #endif
 	{ "response-policy", &cfg_type_rpz, 0 },
+	{ "rate-limit", &cfg_type_rrl, 0 },
 	{ NULL, NULL, 0 }
 };
 
openSUSE Build Service is sponsored by