1 /*
2  * Copyright (C) 2013-2015  Internet Systems Consortium, Inc. ("ISC")
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
13  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /*! \file */
18 
19 /*
20  * Rate limit DNS responses.
21  */
22 
23 /* #define ISC_LIST_CHECKINIT */
24 
25 #include <config.h>
26 #include <isc/mem.h>
27 #include <isc/net.h>
28 #include <isc/netaddr.h>
29 #include <isc/print.h>
30 
31 #include <dns/result.h>
32 #include <dns/rcode.h>
33 #include <dns/rdatatype.h>
34 #include <dns/rdataclass.h>
35 #include <dns/log.h>
36 #include <dns/rrl.h>
37 #include <dns/view.h>
38 
39 static void
40 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
41 	char *log_buf, unsigned int log_buf_len);
42 
43 /*
44  * Get a modulus for a hash function that is tolerably likely to be
45  * relatively prime to most inputs.  Of course, we get a prime for for initial
46  * values not larger than the square of the last prime.  We often get a prime
47  * after that.
48  * This works well in practice for hash tables up to at least 100
49  * times the square of the last prime and better than a multiplicative hash.
50  */
51 static int
hash_divisor(unsigned int initial)52 hash_divisor(unsigned int initial) {
53 	static isc_uint16_t primes[] = {
54 		  3,   5,   7,  11,  13,  17,  19,  23,  29,  31,  37,  41,
55 		 43,  47,  53,  59,  61,  67,  71,  73,  79,  83,  89,  97,
56 #if 0
57 		101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
58 		163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
59 		229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
60 		293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
61 		373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
62 		443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
63 		521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
64 		601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
65 		673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
66 		757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
67 		839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
68 		929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
69 #endif
70 	};
71 	int divisions, tries;
72 	unsigned int result;
73 	isc_uint16_t *pp, p;
74 
75 	result = initial;
76 
77 	if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) {
78 		pp = primes;
79 		while (*pp < result)
80 			++pp;
81 		return (*pp);
82 	}
83 
84 	if ((result & 1) == 0)
85 		++result;
86 
87 	divisions = 0;
88 	tries = 1;
89 	pp = primes;
90 	do {
91 		p = *pp++;
92 		++divisions;
93 		if ((result % p) == 0) {
94 			++tries;
95 			result += 2;
96 			pp = primes;
97 		}
98 	} while (pp < &primes[sizeof(primes) / sizeof(primes[0])]);
99 
100 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
101 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
102 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
103 			      "%d hash_divisor() divisions in %d tries"
104 			      " to get %d from %d",
105 			      divisions, tries, result, initial);
106 
107 	return (result);
108 }
109 
110 /*
111  * Convert a timestamp to a number of seconds in the past.
112  */
113 static inline int
delta_rrl_time(isc_stdtime_t ts,isc_stdtime_t now)114 delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
115 	int delta;
116 
117 	delta = now - ts;
118 	if (delta >= 0)
119 		return (delta);
120 
121 	/*
122 	 * The timestamp is in the future.  That future might result from
123 	 * re-ordered requests, because we use timestamps on requests
124 	 * instead of consulting a clock.  Timestamps in the distant future are
125 	 * assumed to result from clock changes.  When the clock changes to
126 	 * the past, make existing timestamps appear to be in the past.
127 	 */
128 	if (delta < -DNS_RRL_MAX_TIME_TRAVEL)
129 		return (DNS_RRL_FOREVER);
130 	return (0);
131 }
132 
133 static inline int
get_age(const dns_rrl_t * rrl,const dns_rrl_entry_t * e,isc_stdtime_t now)134 get_age(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, isc_stdtime_t now) {
135 	if (!e->ts_valid)
136 		return (DNS_RRL_FOREVER);
137 	return (delta_rrl_time(e->ts + rrl->ts_bases[e->ts_gen], now));
138 }
139 
140 static inline void
set_age(dns_rrl_t * rrl,dns_rrl_entry_t * e,isc_stdtime_t now)141 set_age(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_stdtime_t now) {
142 	dns_rrl_entry_t *e_old;
143 	unsigned int ts_gen;
144 	int i, ts;
145 
146 	ts_gen = rrl->ts_gen;
147 	ts = now - rrl->ts_bases[ts_gen];
148 	if (ts < 0) {
149 		if (ts < -DNS_RRL_MAX_TIME_TRAVEL)
150 			ts = DNS_RRL_FOREVER;
151 		else
152 			ts = 0;
153 	}
154 
155 	/*
156 	 * Make a new timestamp base if the current base is too old.
157 	 * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
158 	 * useless history.  Their timestamps can be treated as if they are
159 	 * all the same.
160 	 * We only do arithmetic on more recent timestamps, so bases for
161 	 * older timestamps can be recycled provided the old timestamps are
162 	 * marked as ancient history.
163 	 * This loop is almost always very short because most entries are
164 	 * recycled after one second and any entries that need to be marked
165 	 * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
166 	 */
167 	if (ts >= DNS_RRL_MAX_TS) {
168 		ts_gen = (ts_gen + 1) % DNS_RRL_TS_BASES;
169 		for (e_old = ISC_LIST_TAIL(rrl->lru), i = 0;
170 		     e_old != NULL && (e_old->ts_gen == ts_gen ||
171 				       !ISC_LINK_LINKED(e_old, hlink));
172 		     e_old = ISC_LIST_PREV(e_old, lru), ++i)
173 		{
174 			e_old->ts_valid = ISC_FALSE;
175 		}
176 		if (i != 0)
177 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
178 				      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
179 				      "rrl new time base scanned %d entries"
180 				      " at %d for %d %d %d %d",
181 				      i, now, rrl->ts_bases[ts_gen],
182 				      rrl->ts_bases[(ts_gen + 1) %
183 					DNS_RRL_TS_BASES],
184 				      rrl->ts_bases[(ts_gen + 2) %
185 					DNS_RRL_TS_BASES],
186 				      rrl->ts_bases[(ts_gen + 3) %
187 					DNS_RRL_TS_BASES]);
188 		rrl->ts_gen = ts_gen;
189 		rrl->ts_bases[ts_gen] = now;
190 		ts = 0;
191 	}
192 
193 	e->ts_gen = ts_gen;
194 	e->ts = ts;
195 	e->ts_valid = ISC_TRUE;
196 }
197 
198 static isc_result_t
expand_entries(dns_rrl_t * rrl,int new)199 expand_entries(dns_rrl_t *rrl, int new) {
200 	unsigned int bsize;
201 	dns_rrl_block_t *b;
202 	dns_rrl_entry_t *e;
203 	double rate;
204 	int i;
205 
206 	if (rrl->num_entries + new >= rrl->max_entries &&
207 	    rrl->max_entries != 0)
208 	{
209 		new = rrl->max_entries - rrl->num_entries;
210 		if (new <= 0)
211 			return (ISC_R_SUCCESS);
212 	}
213 
214 	/*
215 	 * Log expansions so that the user can tune max-table-size
216 	 * and min-table-size.
217 	 */
218 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) &&
219 	    rrl->hash != NULL) {
220 		rate = rrl->probes;
221 		if (rrl->searches != 0)
222 			rate /= rrl->searches;
223 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
224 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
225 			      "increase from %d to %d RRL entries with"
226 			      " %d bins; average search length %.1f",
227 			      rrl->num_entries, rrl->num_entries+new,
228 			      rrl->hash->length, rate);
229 	}
230 
231 	bsize = sizeof(dns_rrl_block_t) + (new-1)*sizeof(dns_rrl_entry_t);
232 	b = isc_mem_get(rrl->mctx, bsize);
233 	if (b == NULL) {
234 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
235 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
236 			      "isc_mem_get(%d) failed for RRL entries",
237 			      bsize);
238 		return (ISC_R_NOMEMORY);
239 	}
240 	memset(b, 0, bsize);
241 	b->size = bsize;
242 
243 	e = b->entries;
244 	for (i = 0; i < new; ++i, ++e) {
245 		ISC_LINK_INIT(e, hlink);
246 		ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
247 	}
248 	rrl->num_entries += new;
249 	ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);
250 
251 	return (ISC_R_SUCCESS);
252 }
253 
254 static inline dns_rrl_bin_t *
get_bin(dns_rrl_hash_t * hash,unsigned int hval)255 get_bin(dns_rrl_hash_t *hash, unsigned int hval) {
256 	INSIST(hash != NULL);
257 	return (&hash->bins[hval % hash->length]);
258 }
259 
260 static void
free_old_hash(dns_rrl_t * rrl)261 free_old_hash(dns_rrl_t *rrl) {
262 	dns_rrl_hash_t *old_hash;
263 	dns_rrl_bin_t *old_bin;
264 	dns_rrl_entry_t *e, *e_next;
265 
266 	old_hash = rrl->old_hash;
267 	for (old_bin = &old_hash->bins[0];
268 	     old_bin < &old_hash->bins[old_hash->length];
269 	     ++old_bin)
270 	{
271 		for (e = ISC_LIST_HEAD(*old_bin); e != NULL; e = e_next) {
272 			e_next = ISC_LIST_NEXT(e, hlink);
273 			ISC_LINK_INIT(e, hlink);
274 		}
275 	}
276 
277 	isc_mem_put(rrl->mctx, old_hash,
278 		    sizeof(*old_hash)
279 		      + (old_hash->length - 1) * sizeof(old_hash->bins[0]));
280 	rrl->old_hash = NULL;
281 }
282 
283 static isc_result_t
expand_rrl_hash(dns_rrl_t * rrl,isc_stdtime_t now)284 expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
285 	dns_rrl_hash_t *hash;
286 	int old_bins, new_bins, hsize;
287 	double rate;
288 
289 	if (rrl->old_hash != NULL)
290 		free_old_hash(rrl);
291 
292 	/*
293 	 * Most searches fail and so go to the end of the chain.
294 	 * Use a small hash table load factor.
295 	 */
296 	old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
297 	new_bins = old_bins/8 + old_bins;
298 	if (new_bins < rrl->num_entries)
299 		new_bins = rrl->num_entries;
300 	new_bins = hash_divisor(new_bins);
301 
302 	hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]);
303 	hash = isc_mem_get(rrl->mctx, hsize);
304 	if (hash == NULL) {
305 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
306 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
307 			      "isc_mem_get(%d) failed for"
308 			      " RRL hash table",
309 			      hsize);
310 		return (ISC_R_NOMEMORY);
311 	}
312 	memset(hash, 0, hsize);
313 	hash->length = new_bins;
314 	rrl->hash_gen ^= 1;
315 	hash->gen = rrl->hash_gen;
316 
317 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && old_bins != 0) {
318 		rate = rrl->probes;
319 		if (rrl->searches != 0)
320 			rate /= rrl->searches;
321 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
322 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
323 			      "increase from %d to %d RRL bins for"
324 			      " %d entries; average search length %.1f",
325 			      old_bins, new_bins, rrl->num_entries, rate);
326 	}
327 
328 	rrl->old_hash = rrl->hash;
329 	if (rrl->old_hash != NULL)
330 		rrl->old_hash->check_time = now;
331 	rrl->hash = hash;
332 
333 	return (ISC_R_SUCCESS);
334 }
335 
336 static void
ref_entry(dns_rrl_t * rrl,dns_rrl_entry_t * e,int probes,isc_stdtime_t now)337 ref_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, int probes, isc_stdtime_t now) {
338 	/*
339 	 * Make the entry most recently used.
340 	 */
341 	if (ISC_LIST_HEAD(rrl->lru) != e) {
342 		if (e == rrl->last_logged)
343 			rrl->last_logged = ISC_LIST_PREV(e, lru);
344 		ISC_LIST_UNLINK(rrl->lru, e, lru);
345 		ISC_LIST_PREPEND(rrl->lru, e, lru);
346 	}
347 
348 	/*
349 	 * Expand the hash table if it is time and necessary.
350 	 * This will leave the newly referenced entry in a chain in the
351 	 * old hash table.  It will migrate to the new hash table the next
352 	 * time it is used or be cut loose when the old hash table is destroyed.
353 	 */
354 	rrl->probes += probes;
355 	++rrl->searches;
356 	if (rrl->searches > 100 &&
357 	    delta_rrl_time(rrl->hash->check_time, now) > 1) {
358 		if (rrl->probes/rrl->searches > 2)
359 			expand_rrl_hash(rrl, now);
360 		rrl->hash->check_time = now;
361 		rrl->probes = 0;
362 		rrl->searches = 0;
363 	}
364 }
365 
366 static inline isc_boolean_t
key_cmp(const dns_rrl_key_t * a,const dns_rrl_key_t * b)367 key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
368 	if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0)
369 		return (ISC_TRUE);
370 	return (ISC_FALSE);
371 }
372 
373 static inline isc_uint32_t
hash_key(const dns_rrl_key_t * key)374 hash_key(const dns_rrl_key_t *key) {
375 	isc_uint32_t hval;
376 	int i;
377 
378 	hval = key->w[0];
379 	for (i = sizeof(key->w) / sizeof(key->w[0]) - 1; i >= 0; --i) {
380 		hval = key->w[i] + (hval<<1);
381 	}
382 	return (hval);
383 }
384 
385 /*
386  * Construct the hash table key.
387  * Use a hash of the DNS query name to save space in the database.
388  * Collisions result in legitimate rate limiting responses for one
389  * query name also limiting responses for other names to the
390  * same client.  This is rare and benign enough given the large
391  * space costs compared to keeping the entire name in the database
392  * entry or the time costs of dynamic allocation.
393  */
394 static void
make_key(const dns_rrl_t * rrl,dns_rrl_key_t * key,const isc_sockaddr_t * client_addr,dns_rdatatype_t qtype,dns_name_t * qname,dns_rdataclass_t qclass,dns_rrl_rtype_t rtype)395 make_key(const dns_rrl_t *rrl, dns_rrl_key_t *key,
396 	 const isc_sockaddr_t *client_addr,
397 	 dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
398 	 dns_rrl_rtype_t rtype)
399 {
400 	dns_name_t base;
401 	dns_offsets_t base_offsets;
402 	int labels, i;
403 
404 	memset(key, 0, sizeof(*key));
405 
406 	key->s.rtype = rtype;
407 	if (rtype == DNS_RRL_RTYPE_QUERY) {
408 		key->s.qtype = qtype;
409 		key->s.qclass = qclass & 0xff;
410 	} else if (rtype == DNS_RRL_RTYPE_REFERRAL ||
411 		   rtype == DNS_RRL_RTYPE_NODATA) {
412 		/*
413 		 * Because there is no qtype in the empty answer sections of
414 		 * referral and NODATA responses, count them as the same.
415 		 */
416 		key->s.qclass = qclass & 0xff;
417 	}
418 
419 	if (qname != NULL && qname->labels != 0) {
420 		/*
421 		 * Ignore the first label of wildcards.
422 		 */
423 		if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
424 		    (labels = dns_name_countlabels(qname)) > 1)
425 		{
426 			dns_name_init(&base, base_offsets);
427 			dns_name_getlabelsequence(qname, 1, labels-1, &base);
428 			key->s.qname_hash = dns_name_hashbylabel(&base,
429 							ISC_FALSE);
430 		} else {
431 			key->s.qname_hash = dns_name_hashbylabel(qname,
432 							ISC_FALSE);
433 		}
434 	}
435 
436 	switch (client_addr->type.sa.sa_family) {
437 	case AF_INET:
438 		key->s.ip[0] = (client_addr->type.sin.sin_addr.s_addr &
439 			      rrl->ipv4_mask);
440 		break;
441 	case AF_INET6:
442 		key->s.ipv6 = ISC_TRUE;
443 		memmove(key->s.ip, &client_addr->type.sin6.sin6_addr,
444 			sizeof(key->s.ip));
445 		for (i = 0; i < DNS_RRL_MAX_PREFIX/32; ++i)
446 			key->s.ip[i] &= rrl->ipv6_mask[i];
447 		break;
448 	}
449 }
450 
451 static inline dns_rrl_rate_t *
get_rate(dns_rrl_t * rrl,dns_rrl_rtype_t rtype)452 get_rate(dns_rrl_t *rrl, dns_rrl_rtype_t rtype) {
453 	switch (rtype) {
454 	case DNS_RRL_RTYPE_QUERY:
455 		return (&rrl->responses_per_second);
456 	case DNS_RRL_RTYPE_REFERRAL:
457 		return (&rrl->referrals_per_second);
458 	case DNS_RRL_RTYPE_NODATA:
459 		return (&rrl->nodata_per_second);
460 	case DNS_RRL_RTYPE_NXDOMAIN:
461 		return (&rrl->nxdomains_per_second);
462 	case DNS_RRL_RTYPE_ERROR:
463 		return (&rrl->errors_per_second);
464 	case DNS_RRL_RTYPE_ALL:
465 		return (&rrl->all_per_second);
466 	default:
467 		INSIST(0);
468 	}
469 	return (NULL);
470 }
471 
472 static int
response_balance(dns_rrl_t * rrl,const dns_rrl_entry_t * e,int age)473 response_balance(dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
474 	dns_rrl_rate_t *ratep;
475 	int balance, rate;
476 
477 	if (e->key.s.rtype == DNS_RRL_RTYPE_TCP) {
478 		rate = 1;
479 	} else {
480 		ratep = get_rate(rrl, e->key.s.rtype);
481 		rate = ratep->scaled;
482 	}
483 
484 	balance = e->responses + age * rate;
485 	if (balance > rate)
486 		balance = rate;
487 	return (balance);
488 }
489 
490 /*
491  * Search for an entry for a response and optionally create it.
492  */
493 static dns_rrl_entry_t *
get_entry(dns_rrl_t * rrl,const isc_sockaddr_t * client_addr,dns_rdataclass_t qclass,dns_rdatatype_t qtype,dns_name_t * qname,dns_rrl_rtype_t rtype,isc_stdtime_t now,isc_boolean_t create,char * log_buf,unsigned int log_buf_len)494 get_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
495 	  dns_rdataclass_t qclass, dns_rdatatype_t qtype, dns_name_t *qname,
496 	  dns_rrl_rtype_t rtype, isc_stdtime_t now, isc_boolean_t create,
497 	  char *log_buf, unsigned int log_buf_len)
498 {
499 	dns_rrl_key_t key;
500 	isc_uint32_t hval;
501 	dns_rrl_entry_t *e;
502 	dns_rrl_hash_t *hash;
503 	dns_rrl_bin_t *new_bin, *old_bin;
504 	int probes, age;
505 
506 	make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
507 	hval = hash_key(&key);
508 
509 	/*
510 	 * Look for the entry in the current hash table.
511 	 */
512 	new_bin = get_bin(rrl->hash, hval);
513 	probes = 1;
514 	e = ISC_LIST_HEAD(*new_bin);
515 	while (e != NULL) {
516 		if (key_cmp(&e->key, &key)) {
517 			ref_entry(rrl, e, probes, now);
518 			return (e);
519 		}
520 		++probes;
521 		e = ISC_LIST_NEXT(e, hlink);
522 	}
523 
524 	/*
525 	 * Look in the old hash table.
526 	 */
527 	if (rrl->old_hash != NULL) {
528 		old_bin = get_bin(rrl->old_hash, hval);
529 		e = ISC_LIST_HEAD(*old_bin);
530 		while (e != NULL) {
531 			if (key_cmp(&e->key, &key)) {
532 				ISC_LIST_UNLINK(*old_bin, e, hlink);
533 				ISC_LIST_PREPEND(*new_bin, e, hlink);
534 				e->hash_gen = rrl->hash_gen;
535 				ref_entry(rrl, e, probes, now);
536 				return (e);
537 			}
538 			e = ISC_LIST_NEXT(e, hlink);
539 		}
540 
541 		/*
542 		 * Discard prevous hash table when all of its entries are old.
543 		 */
544 		age = delta_rrl_time(rrl->old_hash->check_time, now);
545 		if (age > rrl->window)
546 			free_old_hash(rrl);
547 	}
548 
549 	if (!create)
550 		return (NULL);
551 
552 	/*
553 	 * The entry does not exist, so create it by finding a free entry.
554 	 * Keep currently penalized and logged entries.
555 	 * Try to make more entries if none are idle.
556 	 * Steal the oldest entry if we cannot create more.
557 	 */
558 	for (e = ISC_LIST_TAIL(rrl->lru);
559 	     e != NULL;
560 	     e = ISC_LIST_PREV(e, lru))
561 	{
562 		if (!ISC_LINK_LINKED(e, hlink))
563 			break;
564 		age = get_age(rrl, e, now);
565 		if (age <= 1) {
566 			e = NULL;
567 			break;
568 		}
569 		if (!e->logged && response_balance(rrl, e, age) > 0)
570 			break;
571 	}
572 	if (e == NULL) {
573 		expand_entries(rrl, ISC_MIN((rrl->num_entries+1)/2, 1000));
574 		e = ISC_LIST_TAIL(rrl->lru);
575 	}
576 	if (e->logged)
577 		log_end(rrl, e, ISC_TRUE, log_buf, log_buf_len);
578 	if (ISC_LINK_LINKED(e, hlink)) {
579 		if (e->hash_gen == rrl->hash_gen)
580 			hash = rrl->hash;
581 		else
582 			hash = rrl->old_hash;
583 		old_bin = get_bin(hash, hash_key(&e->key));
584 		ISC_LIST_UNLINK(*old_bin, e, hlink);
585 	}
586 	ISC_LIST_PREPEND(*new_bin, e, hlink);
587 	e->hash_gen = rrl->hash_gen;
588 	e->key = key;
589 	e->ts_valid = ISC_FALSE;
590 	ref_entry(rrl, e, probes, now);
591 	return (e);
592 }
593 
594 static void
debit_log(const dns_rrl_entry_t * e,int age,const char * action)595 debit_log(const dns_rrl_entry_t *e, int age, const char *action) {
596 	char buf[sizeof("age=12345678")];
597 	const char *age_str;
598 
599 	if (age == DNS_RRL_FOREVER) {
600 		age_str = "";
601 	} else {
602 		snprintf(buf, sizeof(buf), "age=%d", age);
603 		age_str = buf;
604 	}
605 	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
606 		      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
607 		      "rrl %08x %6s  responses=%-3d %s",
608 		      hash_key(&e->key), age_str, e->responses, action);
609 }
610 
611 static inline dns_rrl_result_t
debit_rrl_entry(dns_rrl_t * rrl,dns_rrl_entry_t * e,double qps,double scale,const isc_sockaddr_t * client_addr,isc_stdtime_t now,char * log_buf,unsigned int log_buf_len)612 debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
613 		const isc_sockaddr_t *client_addr, isc_stdtime_t now,
614 		char *log_buf, unsigned int log_buf_len)
615 {
616 	int rate, new_rate, slip, new_slip, age, log_secs, min;
617 	dns_rrl_rate_t *ratep;
618 	dns_rrl_entry_t const *credit_e;
619 
620 	/*
621 	 * Pick the rate counter.
622 	 * Optionally adjust the rate by the estimated query/second rate.
623 	 */
624 	ratep = get_rate(rrl, e->key.s.rtype);
625 	rate = ratep->r;
626 	if (rate == 0)
627 		return (DNS_RRL_RESULT_OK);
628 
629 	if (scale < 1.0) {
630 		/*
631 		 * The limit for clients that have used TCP is not scaled.
632 		 */
633 		credit_e = get_entry(rrl, client_addr,
634 				     0, dns_rdatatype_none, NULL,
635 				     DNS_RRL_RTYPE_TCP, now, ISC_FALSE,
636 				     log_buf, log_buf_len);
637 		if (credit_e != NULL) {
638 			age = get_age(rrl, e, now);
639 			if (age < rrl->window)
640 				scale = 1.0;
641 		}
642 	}
643 	if (scale < 1.0) {
644 		new_rate = (int) (rate * scale);
645 		if (new_rate < 1)
646 			new_rate = 1;
647 		if (ratep->scaled != new_rate) {
648 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
649 				      DNS_LOGMODULE_REQUEST,
650 				      DNS_RRL_LOG_DEBUG1,
651 				      "%d qps scaled %s by %.2f"
652 				      " from %d to %d",
653 				      (int)qps, ratep->str, scale,
654 				      rate, new_rate);
655 			rate = new_rate;
656 			ratep->scaled = rate;
657 		}
658 	}
659 
660 	min = -rrl->window * rate;
661 
662 	/*
663 	 * Treat time jumps into the recent past as no time.
664 	 * Treat entries older than the window as if they were just created
665 	 * Credit other entries.
666 	 */
667 	age = get_age(rrl, e, now);
668 	if (age > 0) {
669 		/*
670 		 * Credit tokens earned during elapsed time.
671 		 */
672 		if (age > rrl->window) {
673 			e->responses = rate;
674 			e->slip_cnt = 0;
675 		} else {
676 			e->responses += rate*age;
677 			if (e->responses > rate) {
678 				e->responses = rate;
679 				e->slip_cnt = 0;
680 			}
681 		}
682 		/*
683 		 * Find the seconds since last log message without overflowing
684 		 * small counter.  This counter is reset when an entry is
685 		 * created.  It is not necessarily reset when some requests
686 		 * are answered provided other requests continue to be dropped
687 		 * or slipped.  This can happen when the request rate is just
688 		 * at the limit.
689 		 */
690 		if (e->logged) {
691 			log_secs = e->log_secs;
692 			log_secs += age;
693 			if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0)
694 				log_secs = DNS_RRL_MAX_LOG_SECS;
695 			e->log_secs = log_secs;
696 		}
697 	}
698 	set_age(rrl, e, now);
699 
700 	/*
701 	 * Debit the entry for this response.
702 	 */
703 	if (--e->responses >= 0) {
704 		if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
705 			debit_log(e, age, "");
706 		return (DNS_RRL_RESULT_OK);
707 	}
708 
709 	if (e->responses < min)
710 		e->responses = min;
711 
712 	/*
713 	 * Drop this response unless it should slip or leak.
714 	 */
715 	slip = rrl->slip.r;
716 	if (slip > 2 && scale < 1.0) {
717 		new_slip = (int) (slip * scale);
718 		if (new_slip < 2)
719 			new_slip = 2;
720 		if (rrl->slip.scaled != new_slip) {
721 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
722 				      DNS_LOGMODULE_REQUEST,
723 				      DNS_RRL_LOG_DEBUG1,
724 				      "%d qps scaled slip"
725 				      " by %.2f from %d to %d",
726 				      (int)qps, scale,
727 				      slip, new_slip);
728 			slip = new_slip;
729 			rrl->slip.scaled = slip;
730 		}
731 	}
732 	if (slip != 0 && e->key.s.rtype != DNS_RRL_RTYPE_ALL) {
733 		if (e->slip_cnt++ == 0) {
734 			if ((int) e->slip_cnt >= slip)
735 				e->slip_cnt = 0;
736 			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
737 				debit_log(e, age, "slip");
738 			return (DNS_RRL_RESULT_SLIP);
739 		} else if ((int) e->slip_cnt >= slip) {
740 			e->slip_cnt = 0;
741 		}
742 	}
743 
744 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
745 		debit_log(e, age, "drop");
746 	return (DNS_RRL_RESULT_DROP);
747 }
748 
749 static inline dns_rrl_qname_buf_t *
get_qname(dns_rrl_t * rrl,const dns_rrl_entry_t * e)750 get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
751 	dns_rrl_qname_buf_t *qbuf;
752 
753 	qbuf = rrl->qnames[e->log_qname];
754 	if (qbuf == NULL || qbuf->e != e)
755 		return (NULL);
756 	return (qbuf);
757 }
758 
759 static inline void
free_qname(dns_rrl_t * rrl,dns_rrl_entry_t * e)760 free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
761 	dns_rrl_qname_buf_t *qbuf;
762 
763 	qbuf = get_qname(rrl, e);
764 	if (qbuf != NULL) {
765 		qbuf->e = NULL;
766 		ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
767 	}
768 }
769 
770 static void
add_log_str(isc_buffer_t * lb,const char * str,unsigned int str_len)771 add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) {
772 	isc_region_t region;
773 
774 	isc_buffer_availableregion(lb, &region);
775 	if (str_len >= region.length) {
776 		if (region.length <= 0)
777 			return;
778 		str_len = region.length;
779 	}
780 	memmove(region.base, str, str_len);
781 	isc_buffer_add(lb, str_len);
782 }
783 
784 #define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)
785 
786 /*
787  * Build strings for the logs
788  */
789 static void
make_log_buf(dns_rrl_t * rrl,dns_rrl_entry_t * e,const char * str1,const char * str2,isc_boolean_t plural,dns_name_t * qname,isc_boolean_t save_qname,dns_rrl_result_t rrl_result,isc_result_t resp_result,char * log_buf,unsigned int log_buf_len)790 make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e,
791 	     const char *str1, const char *str2, isc_boolean_t plural,
792 	     dns_name_t *qname, isc_boolean_t save_qname,
793 	     dns_rrl_result_t rrl_result, isc_result_t resp_result,
794 	     char *log_buf, unsigned int log_buf_len)
795 {
796 	isc_buffer_t lb;
797 	dns_rrl_qname_buf_t *qbuf;
798 	isc_netaddr_t cidr;
799 	char strbuf[ISC_MAX(sizeof("/123"), sizeof("  (12345678)"))];
800 	const char *rstr;
801 	isc_result_t msg_result;
802 
803 	if (log_buf_len <= 1) {
804 		if (log_buf_len == 1)
805 			log_buf[0] = '\0';
806 		return;
807 	}
808 	isc_buffer_init(&lb, log_buf, log_buf_len-1);
809 
810 	if (str1 != NULL)
811 		add_log_str(&lb, str1, strlen(str1));
812 	if (str2 != NULL)
813 		add_log_str(&lb, str2, strlen(str2));
814 
815 	switch (rrl_result) {
816 	case DNS_RRL_RESULT_OK:
817 		break;
818 	case DNS_RRL_RESULT_DROP:
819 		ADD_LOG_CSTR(&lb, "drop ");
820 		break;
821 	case DNS_RRL_RESULT_SLIP:
822 		ADD_LOG_CSTR(&lb, "slip ");
823 		break;
824 	default:
825 		INSIST(0);
826 		break;
827 	}
828 
829 	switch (e->key.s.rtype) {
830 	case DNS_RRL_RTYPE_QUERY:
831 		break;
832 	case DNS_RRL_RTYPE_REFERRAL:
833 		ADD_LOG_CSTR(&lb, "referral ");
834 		break;
835 	case DNS_RRL_RTYPE_NODATA:
836 		ADD_LOG_CSTR(&lb, "NODATA ");
837 		break;
838 	case DNS_RRL_RTYPE_NXDOMAIN:
839 		ADD_LOG_CSTR(&lb, "NXDOMAIN ");
840 		break;
841 	case DNS_RRL_RTYPE_ERROR:
842 		if (resp_result == ISC_R_SUCCESS) {
843 			ADD_LOG_CSTR(&lb, "error ");
844 		} else {
845 			rstr = isc_result_totext(resp_result);
846 			add_log_str(&lb, rstr, strlen(rstr));
847 			ADD_LOG_CSTR(&lb, " error ");
848 		}
849 		break;
850 	case DNS_RRL_RTYPE_ALL:
851 		ADD_LOG_CSTR(&lb, "all ");
852 		break;
853 	default:
854 		INSIST(0);
855 	}
856 
857 	if (plural)
858 		ADD_LOG_CSTR(&lb, "responses to ");
859 	else
860 		ADD_LOG_CSTR(&lb, "response to ");
861 
862 	memset(&cidr, 0, sizeof(cidr));
863 	if (e->key.s.ipv6) {
864 		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
865 		cidr.family = AF_INET6;
866 		memset(&cidr.type.in6, 0,  sizeof(cidr.type.in6));
867 		memmove(&cidr.type.in6, e->key.s.ip, sizeof(e->key.s.ip));
868 	} else {
869 		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
870 		cidr.family = AF_INET;
871 		cidr.type.in.s_addr = e->key.s.ip[0];
872 	}
873 	msg_result = isc_netaddr_totext(&cidr, &lb);
874 	if (msg_result != ISC_R_SUCCESS)
875 		ADD_LOG_CSTR(&lb, "?");
876 	add_log_str(&lb, strbuf, strlen(strbuf));
877 
878 	if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY ||
879 	    e->key.s.rtype == DNS_RRL_RTYPE_REFERRAL ||
880 	    e->key.s.rtype == DNS_RRL_RTYPE_NODATA ||
881 	    e->key.s.rtype == DNS_RRL_RTYPE_NXDOMAIN) {
882 		qbuf = get_qname(rrl, e);
883 		if (save_qname && qbuf == NULL &&
884 		    qname != NULL && dns_name_isabsolute(qname)) {
885 			/*
886 			 * Capture the qname for the "stop limiting" message.
887 			 */
888 			qbuf = ISC_LIST_TAIL(rrl->qname_free);
889 			if (qbuf != NULL) {
890 				ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
891 			} else if (rrl->num_qnames < DNS_RRL_QNAMES) {
892 				qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
893 				if (qbuf != NULL) {
894 					memset(qbuf, 0, sizeof(*qbuf));
895 					ISC_LINK_INIT(qbuf, link);
896 					qbuf->index = rrl->num_qnames;
897 					rrl->qnames[rrl->num_qnames++] = qbuf;
898 				} else {
899 					isc_log_write(dns_lctx,
900 						      DNS_LOGCATEGORY_RRL,
901 						      DNS_LOGMODULE_REQUEST,
902 						      DNS_RRL_LOG_FAIL,
903 						      "isc_mem_get(%d)"
904 						      " failed for RRL qname",
905 						      (int)sizeof(*qbuf));
906 				}
907 			}
908 			if (qbuf != NULL) {
909 				e->log_qname = qbuf->index;
910 				qbuf->e = e;
911 				dns_fixedname_init(&qbuf->qname);
912 				dns_name_copy(qname,
913 					      dns_fixedname_name(&qbuf->qname),
914 					      NULL);
915 			}
916 		}
917 		if (qbuf != NULL)
918 			qname = dns_fixedname_name(&qbuf->qname);
919 		if (qname != NULL) {
920 			ADD_LOG_CSTR(&lb, " for ");
921 			(void)dns_name_totext(qname, ISC_TRUE, &lb);
922 		} else {
923 			ADD_LOG_CSTR(&lb, " for (?)");
924 		}
925 		if (e->key.s.rtype != DNS_RRL_RTYPE_NXDOMAIN) {
926 			ADD_LOG_CSTR(&lb, " ");
927 			(void)dns_rdataclass_totext(e->key.s.qclass, &lb);
928 			if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY) {
929 				ADD_LOG_CSTR(&lb, " ");
930 				(void)dns_rdatatype_totext(e->key.s.qtype, &lb);
931 			}
932 		}
933 		snprintf(strbuf, sizeof(strbuf), "  (%08x)",
934 			 e->key.s.qname_hash);
935 		add_log_str(&lb, strbuf, strlen(strbuf));
936 	}
937 
938 	/*
939 	 * We saved room for '\0'.
940 	 */
941 	log_buf[isc_buffer_usedlength(&lb)] = '\0';
942 }
943 
944 static void
log_end(dns_rrl_t * rrl,dns_rrl_entry_t * e,isc_boolean_t early,char * log_buf,unsigned int log_buf_len)945 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_boolean_t early,
946 	char *log_buf, unsigned int log_buf_len)
947 {
948 	if (e->logged) {
949 		make_log_buf(rrl, e,
950 			     early ? "*" : NULL,
951 			     rrl->log_only ? "would stop limiting "
952 					   : "stop limiting ",
953 			     ISC_TRUE, NULL, ISC_FALSE,
954 			     DNS_RRL_RESULT_OK, ISC_R_SUCCESS,
955 			     log_buf, log_buf_len);
956 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
957 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
958 			      "%s", log_buf);
959 		free_qname(rrl, e);
960 		e->logged = ISC_FALSE;
961 		--rrl->num_logged;
962 	}
963 }
964 
965 /*
966  * Log messages for streams that have stopped being rate limited.
967  */
968 static void
log_stops(dns_rrl_t * rrl,isc_stdtime_t now,int limit,char * log_buf,unsigned int log_buf_len)969 log_stops(dns_rrl_t *rrl, isc_stdtime_t now, int limit,
970 	  char *log_buf, unsigned int log_buf_len)
971 {
972 	dns_rrl_entry_t *e;
973 	int age;
974 
975 	for (e = rrl->last_logged; e != NULL; e = ISC_LIST_PREV(e, lru)) {
976 		if (!e->logged)
977 			continue;
978 		if (now != 0) {
979 			age = get_age(rrl, e, now);
980 			if (age < DNS_RRL_STOP_LOG_SECS ||
981 			    response_balance(rrl, e, age) < 0)
982 				break;
983 		}
984 
985 		log_end(rrl, e, now == 0, log_buf, log_buf_len);
986 		if (rrl->num_logged <= 0)
987 			break;
988 
989 		/*
990 		 * Too many messages could stall real work.
991 		 */
992 		if (--limit < 0) {
993 			rrl->last_logged = ISC_LIST_PREV(e, lru);
994 			return;
995 		}
996 	}
997 	if (e == NULL) {
998 		INSIST(rrl->num_logged == 0);
999 		rrl->log_stops_time = now;
1000 	}
1001 	rrl->last_logged = e;
1002 }
1003 
1004 /*
1005  * Main rate limit interface.
1006  */
1007 dns_rrl_result_t
dns_rrl(dns_view_t * view,const isc_sockaddr_t * client_addr,isc_boolean_t is_tcp,dns_rdataclass_t qclass,dns_rdatatype_t qtype,dns_name_t * qname,isc_result_t resp_result,isc_stdtime_t now,isc_boolean_t wouldlog,char * log_buf,unsigned int log_buf_len)1008 dns_rrl(dns_view_t *view,
1009 	const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
1010 	dns_rdataclass_t qclass, dns_rdatatype_t qtype,
1011 	dns_name_t *qname, isc_result_t resp_result, isc_stdtime_t now,
1012 	isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len)
1013 {
1014 	dns_rrl_t *rrl;
1015 	dns_rrl_rtype_t rtype;
1016 	dns_rrl_entry_t *e;
1017 	isc_netaddr_t netclient;
1018 	int secs;
1019 	double qps, scale;
1020 	int exempt_match;
1021 	isc_result_t result;
1022 	dns_rrl_result_t rrl_result;
1023 
1024 	INSIST(log_buf != NULL && log_buf_len > 0);
1025 
1026 	rrl = view->rrl;
1027 	if (rrl->exempt != NULL) {
1028 		isc_netaddr_fromsockaddr(&netclient, client_addr);
1029 		result = dns_acl_match(&netclient, NULL, rrl->exempt,
1030 				       &view->aclenv, &exempt_match, NULL);
1031 		if (result == ISC_R_SUCCESS && exempt_match > 0)
1032 			return (DNS_RRL_RESULT_OK);
1033 	}
1034 
1035 	LOCK(&rrl->lock);
1036 
1037 	/*
1038 	 * Estimate total query per second rate when scaling by qps.
1039 	 */
1040 	if (rrl->qps_scale == 0) {
1041 		qps = 0.0;
1042 		scale = 1.0;
1043 	} else {
1044 		++rrl->qps_responses;
1045 		secs = delta_rrl_time(rrl->qps_time, now);
1046 		if (secs <= 0) {
1047 			qps = rrl->qps;
1048 		} else {
1049 			qps = (1.0*rrl->qps_responses) / secs;
1050 			if (secs >= rrl->window) {
1051 				if (isc_log_wouldlog(dns_lctx,
1052 						     DNS_RRL_LOG_DEBUG3))
1053 					isc_log_write(dns_lctx,
1054 						      DNS_LOGCATEGORY_RRL,
1055 						      DNS_LOGMODULE_REQUEST,
1056 						      DNS_RRL_LOG_DEBUG3,
1057 						      "%d responses/%d seconds"
1058 						      " = %d qps",
1059 						      rrl->qps_responses, secs,
1060 						      (int)qps);
1061 				rrl->qps = qps;
1062 				rrl->qps_responses = 0;
1063 				rrl->qps_time = now;
1064 			} else if (qps < rrl->qps) {
1065 				qps = rrl->qps;
1066 			}
1067 		}
1068 		scale = rrl->qps_scale / qps;
1069 	}
1070 
1071 	/*
1072 	 * Do maintenance once per second.
1073 	 */
1074 	if (rrl->num_logged > 0 && rrl->log_stops_time != now)
1075 		log_stops(rrl, now, 8, log_buf, log_buf_len);
1076 
1077 	/*
1078 	 * Notice TCP responses when scaling limits by qps.
1079 	 * Do not try to rate limit TCP responses.
1080 	 */
1081 	if (is_tcp) {
1082 		if (scale < 1.0) {
1083 			e = get_entry(rrl, client_addr,
1084 				      0, dns_rdatatype_none, NULL,
1085 				      DNS_RRL_RTYPE_TCP, now, ISC_TRUE,
1086 				      log_buf, log_buf_len);
1087 			if (e != NULL) {
1088 				e->responses = -(rrl->window+1);
1089 				set_age(rrl, e, now);
1090 			}
1091 		}
1092 		UNLOCK(&rrl->lock);
1093 		return (ISC_R_SUCCESS);
1094 	}
1095 
1096 	/*
1097 	 * Find the right kind of entry, creating it if necessary.
1098 	 * If that is impossible, then nothing more can be done
1099 	 */
1100 	switch (resp_result) {
1101 	case ISC_R_SUCCESS:
1102 		rtype = DNS_RRL_RTYPE_QUERY;
1103 		break;
1104 	case DNS_R_DELEGATION:
1105 		rtype = DNS_RRL_RTYPE_REFERRAL;
1106 		break;
1107 	case DNS_R_NXRRSET:
1108 		rtype = DNS_RRL_RTYPE_NODATA;
1109 		break;
1110 	case DNS_R_NXDOMAIN:
1111 		rtype = DNS_RRL_RTYPE_NXDOMAIN;
1112 		break;
1113 	default:
1114 		rtype = DNS_RRL_RTYPE_ERROR;
1115 		break;
1116 	}
1117 	e = get_entry(rrl, client_addr, qclass, qtype, qname, rtype,
1118 		      now, ISC_TRUE, log_buf, log_buf_len);
1119 	if (e == NULL) {
1120 		UNLOCK(&rrl->lock);
1121 		return (DNS_RRL_RESULT_OK);
1122 	}
1123 
1124 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
1125 		/*
1126 		 * Do not worry about speed or releasing the lock.
1127 		 * This message appears before messages from debit_rrl_entry().
1128 		 */
1129 		make_log_buf(rrl, e, "consider limiting ", NULL, ISC_FALSE,
1130 			     qname, ISC_FALSE, DNS_RRL_RESULT_OK, resp_result,
1131 			     log_buf, log_buf_len);
1132 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1133 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
1134 			      "%s", log_buf);
1135 	}
1136 
1137 	rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
1138 				     log_buf, log_buf_len);
1139 
1140 	if (rrl->all_per_second.r != 0) {
1141 		/*
1142 		 * We must debit the all-per-second token bucket if we have
1143 		 * an all-per-second limit for the IP address.
1144 		 * The all-per-second limit determines the log message
1145 		 * when both limits are hit.
1146 		 * The response limiting must continue if the
1147 		 * all-per-second limiting lapses.
1148 		 */
1149 		dns_rrl_entry_t *e_all;
1150 		dns_rrl_result_t rrl_all_result;
1151 
1152 		e_all = get_entry(rrl, client_addr,
1153 				  0, dns_rdatatype_none, NULL,
1154 				  DNS_RRL_RTYPE_ALL, now, ISC_TRUE,
1155 				  log_buf, log_buf_len);
1156 		if (e_all == NULL) {
1157 			UNLOCK(&rrl->lock);
1158 			return (DNS_RRL_RESULT_OK);
1159 		}
1160 		rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
1161 						 client_addr, now,
1162 						 log_buf, log_buf_len);
1163 		if (rrl_all_result != DNS_RRL_RESULT_OK) {
1164 			e = e_all;
1165 			rrl_result = rrl_all_result;
1166 			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
1167 				make_log_buf(rrl, e,
1168 					     "prefer all-per-second limiting ",
1169 					     NULL, ISC_TRUE, qname, ISC_FALSE,
1170 					     DNS_RRL_RESULT_OK, resp_result,
1171 					     log_buf, log_buf_len);
1172 				isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1173 					      DNS_LOGMODULE_REQUEST,
1174 					      DNS_RRL_LOG_DEBUG1,
1175 					      "%s", log_buf);
1176 			}
1177 		}
1178 	}
1179 
1180 	if (rrl_result == DNS_RRL_RESULT_OK) {
1181 		UNLOCK(&rrl->lock);
1182 		return (DNS_RRL_RESULT_OK);
1183 	}
1184 
1185 	/*
1186 	 * Log occassionally in the rate-limit category.
1187 	 */
1188 	if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
1189 	    isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) {
1190 		make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
1191 			     e->logged ? "continue limiting " : "limit ",
1192 			     ISC_TRUE, qname, ISC_TRUE,
1193 			     DNS_RRL_RESULT_OK, resp_result,
1194 			     log_buf, log_buf_len);
1195 		if (!e->logged) {
1196 			e->logged = ISC_TRUE;
1197 			if (++rrl->num_logged <= 1)
1198 				rrl->last_logged = e;
1199 		}
1200 		e->log_secs = 0;
1201 
1202 		/*
1203 		 * Avoid holding the lock.
1204 		 */
1205 		if (!wouldlog) {
1206 			UNLOCK(&rrl->lock);
1207 			e = NULL;
1208 		}
1209 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1210 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
1211 			      "%s", log_buf);
1212 	}
1213 
1214 	/*
1215 	 * Make a log message for the caller.
1216 	 */
1217 	if (wouldlog)
1218 		make_log_buf(rrl, e,
1219 			     rrl->log_only ? "would rate limit " : "rate limit ",
1220 			     NULL, ISC_FALSE, qname, ISC_FALSE,
1221 			     rrl_result, resp_result, log_buf, log_buf_len);
1222 
1223 	if (e != NULL) {
1224 		/*
1225 		 * Do not save the qname unless we might need it for
1226 		 * the ending log message.
1227 		 */
1228 		if (!e->logged)
1229 			free_qname(rrl, e);
1230 		UNLOCK(&rrl->lock);
1231 	}
1232 
1233 	return (rrl_result);
1234 }
1235 
1236 void
dns_rrl_view_destroy(dns_view_t * view)1237 dns_rrl_view_destroy(dns_view_t *view) {
1238 	dns_rrl_t *rrl;
1239 	dns_rrl_block_t *b;
1240 	dns_rrl_hash_t *h;
1241 	char log_buf[DNS_RRL_LOG_BUF_LEN];
1242 	int i;
1243 
1244 	rrl = view->rrl;
1245 	if (rrl == NULL)
1246 		return;
1247 	view->rrl = NULL;
1248 
1249 	/*
1250 	 * Assume the caller takes care of locking the view and anything else.
1251 	 */
1252 
1253 	if (rrl->num_logged > 0)
1254 		log_stops(rrl, 0, ISC_INT32_MAX, log_buf, sizeof(log_buf));
1255 
1256 	for (i = 0; i < DNS_RRL_QNAMES; ++i) {
1257 		if (rrl->qnames[i] == NULL)
1258 			break;
1259 		isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
1260 	}
1261 
1262 	if (rrl->exempt != NULL)
1263 		dns_acl_detach(&rrl->exempt);
1264 
1265 	DESTROYLOCK(&rrl->lock);
1266 
1267 	while (!ISC_LIST_EMPTY(rrl->blocks)) {
1268 		b = ISC_LIST_HEAD(rrl->blocks);
1269 		ISC_LIST_UNLINK(rrl->blocks, b, link);
1270 		isc_mem_put(rrl->mctx, b, b->size);
1271 	}
1272 
1273 	h = rrl->hash;
1274 	if (h != NULL)
1275 		isc_mem_put(rrl->mctx, h,
1276 			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1277 
1278 	h = rrl->old_hash;
1279 	if (h != NULL)
1280 		isc_mem_put(rrl->mctx, h,
1281 			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1282 
1283 	isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1284 }
1285 
1286 isc_result_t
dns_rrl_init(dns_rrl_t ** rrlp,dns_view_t * view,int min_entries)1287 dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
1288 	dns_rrl_t *rrl;
1289 	isc_result_t result;
1290 
1291 	*rrlp = NULL;
1292 
1293 	rrl = isc_mem_get(view->mctx, sizeof(*rrl));
1294 	if (rrl == NULL)
1295 		return (ISC_R_NOMEMORY);
1296 	memset(rrl, 0, sizeof(*rrl));
1297 	isc_mem_attach(view->mctx, &rrl->mctx);
1298 	result = isc_mutex_init(&rrl->lock);
1299 	if (result != ISC_R_SUCCESS) {
1300 		isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1301 		return (result);
1302 	}
1303 	isc_stdtime_get(&rrl->ts_bases[0]);
1304 
1305 	view->rrl = rrl;
1306 
1307 	result = expand_entries(rrl, min_entries);
1308 	if (result != ISC_R_SUCCESS) {
1309 		dns_rrl_view_destroy(view);
1310 		return (result);
1311 	}
1312 	result = expand_rrl_hash(rrl, 0);
1313 	if (result != ISC_R_SUCCESS) {
1314 		dns_rrl_view_destroy(view);
1315 		return (result);
1316 	}
1317 
1318 	*rrlp = rrl;
1319 	return (ISC_R_SUCCESS);
1320 }
1321