1 /* $NetBSD: ntp_monitor.c,v 1.7 2024/08/18 20:47:17 christos Exp $ */
2
3 /*
4 * ntp_monitor - monitor ntpd statistics
5 */
6 #ifdef HAVE_CONFIG_H
7 # include <config.h>
8 #endif
9
10 #include "ntpd.h"
11 #include "ntp_io.h"
12 #include "ntp_if.h"
13 #include "ntp_lists.h"
14 #include "ntp_stdlib.h"
15 #include <ntp_random.h>
16
17 #include <stdio.h>
18 #include <signal.h>
19 #ifdef HAVE_SYS_IOCTL_H
20 # include <sys/ioctl.h>
21 #endif
22
23 /*
24 * Record statistics based on source address, mode and version. The
25 * receive procedure calls us with the incoming rbufp before it does
26 * anything else. While at it, implement rate controls for inbound
27 * traffic.
28 *
29 * Each entry is doubly linked into two lists, a hash table and a most-
30 * recently-used (MRU) list. When a packet arrives it is looked up in
31 * the hash table. If found, the statistics are updated and the entry
32 * relinked at the head of the MRU list. If not found, a new entry is
33 * allocated, initialized and linked into both the hash table and at the
34 * head of the MRU list.
35 *
36 * Memory is usually allocated by grabbing a big chunk of new memory and
37 * cutting it up into littler pieces. The exception to this when we hit
38 * the memory limit. Then we free memory by grabbing entries off the
39 * tail for the MRU list, unlinking from the hash table, and
40 * reinitializing.
41 *
42 * INC_MONLIST is the default allocation granularity in entries.
43 * INIT_MONLIST is the default initial allocation in entries.
44 */
45 #ifdef MONMEMINC /* old name */
46 # define INC_MONLIST MONMEMINC
47 #elif !defined(INC_MONLIST)
48 # define INC_MONLIST (4 * 1024 / sizeof(mon_entry))
49 #endif
50 #ifndef INIT_MONLIST
51 # define INIT_MONLIST (4 * 1024 / sizeof(mon_entry))
52 #endif
53 #ifndef MRU_MAXDEPTH_DEF
54 # define MRU_MAXDEPTH_DEF (1024 * 1024 / sizeof(mon_entry))
55 #endif
56
57 /*
58 * Hashing stuff
59 */
60 u_char mon_hash_bits;
61
62 /*
63 * Pointers to the hash table and the MRU list. Memory for the hash
64 * table is allocated only if monitoring is enabled.
65 */
66 mon_entry ** mon_hash; /* MRU hash table */
67 mon_entry mon_mru_list; /* mru listhead */
68
69 /*
70 * List of free structures structures, and counters of in-use and total
71 * structures. The free structures are linked with the hash_next field.
72 */
73 static mon_entry *mon_free; /* free list or null if none */
74 u_int mru_alloc; /* mru list + free list count */
75 u_int mru_entries; /* mru list count */
76 u_int mru_peakentries; /* highest mru_entries seen */
77 u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */
78 u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */
79 static u_int mon_mem_increments; /* times called malloc() */
80
81 /*
82 * Parameters of the RES_LIMITED restriction option. We define headway
83 * as the idle time between packets. A packet is discarded if the
84 * headway is less than the minimum, as well as if the average headway
85 * is less than eight times the increment.
86 */
87 int ntp_minpkt = NTP_MINPKT; /* minimum seconds between */
88 /* requests from a client */
89 u_char ntp_minpoll = NTP_MINPOLL; /* minimum average log2 seconds */
90 /* between client requests */
91
92 /*
93 * Initialization state. We may be monitoring, we may not. If
94 * we aren't, we may not even have allocated any memory yet.
95 */
96 u_int mon_enabled; /* enable switch */
97 u_int mru_mindepth = 600; /* preempt above this */
98 int mru_maxage = 64; /* for entries older than */
99 u_int mru_maxdepth = /* MRU count hard limit */
100 MRU_MAXDEPTH_DEF;
101 int mon_age = 3000; /* preemption limit */
102
103 static void mon_getmoremem(void);
104 static void remove_from_hash(mon_entry *);
105 static inline void mon_free_entry(mon_entry *);
106 static inline void mon_reclaim_entry(mon_entry *);
107
108
109 /*
110 * init_mon - initialize monitoring global data
111 */
112 void
init_mon(void)113 init_mon(void)
114 {
115 /*
116 * Don't do much of anything here. We don't allocate memory
117 * until mon_start().
118 */
119 mon_enabled = MON_OFF;
120 INIT_DLIST(mon_mru_list, mru);
121 }
122
123
124 /*
125 * remove_from_hash - removes an entry from the address hash table and
126 * decrements mru_entries.
127 */
128 static void
remove_from_hash(mon_entry * mon)129 remove_from_hash(
130 mon_entry *mon
131 )
132 {
133 u_int hash;
134 mon_entry *punlinked;
135
136 mru_entries--;
137 hash = MON_HASH(&mon->rmtadr);
138 UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next,
139 mon_entry);
140 ENSURE(punlinked == mon);
141 }
142
143
144 static inline void
mon_free_entry(mon_entry * m)145 mon_free_entry(
146 mon_entry *m
147 )
148 {
149 ZERO(*m);
150 LINK_SLIST(mon_free, m, hash_next);
151 }
152
153
154 /*
155 * mon_reclaim_entry - Remove an entry from the MRU list and from the
156 * hash array, then zero-initialize it. Indirectly
157 * decrements mru_entries.
158
159 * The entry is prepared to be reused. Before return, in
160 * remove_from_hash(), mru_entries is decremented. It is the caller's
161 * responsibility to increment it again.
162 */
163 static inline void
mon_reclaim_entry(mon_entry * m)164 mon_reclaim_entry(
165 mon_entry *m
166 )
167 {
168 DEBUG_INSIST(NULL != m);
169
170 UNLINK_DLIST(m, mru);
171 remove_from_hash(m);
172 ZERO(*m);
173 }
174
175
176 /*
177 * mon_getmoremem - get more memory and put it on the free list
178 */
179 static void
mon_getmoremem(void)180 mon_getmoremem(void)
181 {
182 mon_entry *chunk;
183 u_int entries;
184
185 entries = (0 == mon_mem_increments)
186 ? mru_initalloc
187 : mru_incalloc;
188
189 if (entries) {
190 chunk = eallocarray(entries, sizeof(*chunk));
191 mru_alloc += entries;
192 for (chunk += entries; entries; entries--)
193 mon_free_entry(--chunk);
194
195 mon_mem_increments++;
196 }
197 }
198
199
200 /*
201 * mon_start - start up the monitoring software
202 */
203 void
mon_start(int mode)204 mon_start(
205 int mode
206 )
207 {
208 size_t octets;
209 u_int min_hash_slots;
210
211 if (MON_OFF == mode) /* MON_OFF is 0 */
212 return;
213 if (mon_enabled) {
214 mon_enabled |= mode;
215 return;
216 }
217 if (0 == mon_mem_increments)
218 mon_getmoremem();
219 /*
220 * Select the MRU hash table size to limit the average count
221 * per bucket at capacity (mru_maxdepth) to 8, if possible
222 * given our hash is limited to 16 bits.
223 */
224 min_hash_slots = (mru_maxdepth / 8) + 1;
225 mon_hash_bits = 0;
226 while (min_hash_slots >>= 1)
227 mon_hash_bits++;
228 mon_hash_bits = max(4, mon_hash_bits);
229 mon_hash_bits = min(16, mon_hash_bits);
230 octets = sizeof(*mon_hash) * MON_HASH_SIZE;
231 mon_hash = erealloc_zero(mon_hash, octets, 0);
232
233 mon_enabled = mode;
234 }
235
236
237 /*
238 * mon_stop - stop the monitoring software
239 */
240 void
mon_stop(int mode)241 mon_stop(
242 int mode
243 )
244 {
245 mon_entry *mon;
246
247 if (MON_OFF == mon_enabled)
248 return;
249 if ((mon_enabled & mode) == 0 || mode == MON_OFF)
250 return;
251
252 mon_enabled &= ~mode;
253 if (mon_enabled != MON_OFF)
254 return;
255
256 /*
257 * Move everything on the MRU list to the free list quickly,
258 * without bothering to remove each from either the MRU list or
259 * the hash table.
260 */
261 ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
262 mon_free_entry(mon);
263 ITER_DLIST_END()
264
265 /* empty the MRU list and hash table. */
266 mru_entries = 0;
267 INIT_DLIST(mon_mru_list, mru);
268 zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE);
269 }
270
271
272 /*
273 * mon_clearinterface -- remove mru entries referring to a local address
274 * which is going away.
275 */
276 void
mon_clearinterface(endpt * lcladr)277 mon_clearinterface(
278 endpt *lcladr
279 )
280 {
281 mon_entry *mon;
282
283 /* iterate mon over mon_mru_list */
284 ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
285 if (mon->lcladr == lcladr) {
286 /* remove from mru list */
287 UNLINK_DLIST(mon, mru);
288 /* remove from hash list, adjust mru_entries */
289 remove_from_hash(mon);
290 /* put on free list */
291 mon_free_entry(mon);
292 }
293 ITER_DLIST_END()
294 }
295
296
297 /*
298 * ntp_monitor - record stats about this packet
299 *
300 * Returns supplied restriction flags, with RES_LIMITED and RES_KOD
301 * cleared unless the packet should not be responded to normally
302 * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD).
303 * The returned flags are saved in the MRU entry, so that it reflects
304 * whether the last packet from that source triggered rate limiting,
305 * and if so, possible KoD response. This implies you can not tell
306 * whether a given address is eligible for rate limiting/KoD from the
307 * monlist restrict bits, only whether or not the last packet triggered
308 * such responses. ntpdc -c reslist lets you see whether RES_LIMITED
309 * or RES_KOD is lit for a particular address before ntp_monitor()'s
310 * typical dousing.
311 */
312 u_short
ntp_monitor(struct recvbuf * rbufp,u_short flags)313 ntp_monitor(
314 struct recvbuf *rbufp,
315 u_short flags
316 )
317 {
318 l_fp interval_fp;
319 struct pkt * pkt;
320 mon_entry * mon;
321 mon_entry * oldest;
322 int oldest_age;
323 u_int hash;
324 u_short restrict_mask;
325 u_char mode;
326 u_char version;
327 int interval;
328 int head; /* headway increment */
329 int leak; /* new headway */
330 int limit; /* average threshold */
331
332 REQUIRE(rbufp != NULL);
333
334 if (mon_enabled == MON_OFF) {
335 return ~(RES_LIMITED | RES_KOD) & flags;
336 }
337 pkt = &rbufp->recv_pkt;
338 hash = MON_HASH(&rbufp->recv_srcadr);
339 mode = PKT_MODE(pkt->li_vn_mode);
340 version = PKT_VERSION(pkt->li_vn_mode);
341 mon = mon_hash[hash];
342
343 /*
344 * We keep track of all traffic for a given IP in one entry,
345 * otherwise cron'ed ntpdate or similar evades RES_LIMITED.
346 */
347
348 for (; mon != NULL; mon = mon->hash_next) {
349 if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) {
350 break;
351 }
352 }
353 if (mon != NULL) {
354 interval_fp = rbufp->recv_time;
355 L_SUB(&interval_fp, &mon->last);
356 /* add one-half second to round up */
357 L_ADDUF(&interval_fp, 0x80000000);
358 interval = interval_fp.l_i;
359 mon->last = rbufp->recv_time;
360 NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr);
361 mon->count++;
362 restrict_mask = flags;
363 mon->vn_mode = VN_MODE(version, mode);
364
365 /* Shuffle to the head of the MRU list. */
366 UNLINK_DLIST(mon, mru);
367 LINK_DLIST(mon_mru_list, mon, mru);
368
369 /*
370 * At this point the most recent arrival is first in the
371 * MRU list. Decrease the counter by the headway, but
372 * not less than zero.
373 */
374 mon->leak -= interval;
375 mon->leak = max(0, mon->leak);
376 head = 1 << ntp_minpoll;
377 leak = mon->leak + head;
378 limit = NTP_SHIFT * head;
379
380 DPRINTF(2, ("MRU: interval %d headway %d limit %d\n",
381 interval, leak, limit));
382
383 /*
384 * If the minimum and average thresholds are not
385 * exceeded, douse the RES_LIMITED and RES_KOD bits and
386 * increase the counter by the headway increment. Note
387 * that we give a 1-s grace for the minimum threshold
388 * and a 2-s grace for the headway increment. If one or
389 * both thresholds are exceeded and the old counter is
390 * less than the average threshold, set the counter to
391 * the average threshold plus the increment and leave
392 * the RES_LIMITED and RES_KOD bits lit. Otherwise,
393 * leave the counter alone and douse the RES_KOD bit.
394 * This rate-limits the KoDs to no more often than the
395 * average headway.
396 */
397 if (interval + 1 >= ntp_minpkt && leak < limit) {
398 mon->leak = leak - 2;
399 restrict_mask &= ~(RES_LIMITED | RES_KOD);
400 } else if (mon->leak < limit) {
401 mon->leak = limit + head;
402 } else {
403 restrict_mask &= ~RES_KOD;
404 }
405 mon->flags = restrict_mask;
406
407 return mon->flags;
408 }
409
410 /*
411 * If we got here, this is the first we've heard of this
412 * guy. Get him some memory, either from the free list
413 * or from the tail of the MRU list.
414 *
415 * The following ntp.conf "mru" knobs come into play determining
416 * the depth (or count) of the MRU list:
417 * - mru_mindepth ("mru mindepth") is a floor beneath which
418 * entries are kept without regard to their age. The
419 * default is 600 which matches the longtime implementation
420 * limit on the total number of entries.
421 * - mru_maxage ("mru maxage") is a ceiling on the age in
422 * seconds of entries. Entries older than this are
423 * reclaimed once mon_mindepth is exceeded. 64s default.
424 * Note that entries older than this can easily survive
425 * as they are reclaimed only as needed.
426 * - mru_maxdepth ("mru maxdepth") is a hard limit on the
427 * number of entries.
428 * - "mru maxmem" sets mru_maxdepth to the number of entries
429 * which fit in the given number of kilobytes. The default is
430 * 1024, or 1 megabyte.
431 * - mru_initalloc ("mru initalloc" sets the count of the
432 * initial allocation of MRU entries.
433 * - "mru initmem" sets mru_initalloc in units of kilobytes.
434 * The default is 4.
435 * - mru_incalloc ("mru incalloc" sets the number of entries to
436 * allocate on-demand each time the free list is empty.
437 * - "mru incmem" sets mru_incalloc in units of kilobytes.
438 * The default is 4.
439 * Whichever of "mru maxmem" or "mru maxdepth" occurs last in
440 * ntp.conf controls. Similarly for "mru initalloc" and "mru
441 * initmem", and for "mru incalloc" and "mru incmem".
442 */
443 if (mru_entries < mru_mindepth) {
444 if (NULL == mon_free)
445 mon_getmoremem();
446 UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
447 } else {
448 oldest = TAIL_DLIST(mon_mru_list, mru);
449 oldest_age = 0; /* silence uninit warning */
450 if (oldest != NULL) {
451 interval_fp = rbufp->recv_time;
452 L_SUB(&interval_fp, &oldest->last);
453 /* add one-half second to round up */
454 L_ADDUF(&interval_fp, 0x80000000);
455 oldest_age = interval_fp.l_i;
456 }
457 /* note -1 is legal for mru_maxage (disables) */
458 if (oldest != NULL && mru_maxage < oldest_age) {
459 mon_reclaim_entry(oldest);
460 mon = oldest;
461 } else if (mon_free != NULL || mru_alloc <
462 mru_maxdepth) {
463 if (NULL == mon_free)
464 mon_getmoremem();
465 UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
466 /* Preempt from the MRU list if old enough. */
467 } else if (ntp_uurandom() >
468 (double)oldest_age / mon_age) {
469 return ~(RES_LIMITED | RES_KOD) & flags;
470 } else {
471 mon_reclaim_entry(oldest);
472 mon = oldest;
473 }
474 }
475
476 INSIST(mon != NULL);
477
478 /*
479 * Got one, initialize it
480 */
481 mru_entries++;
482 mru_peakentries = max(mru_peakentries, mru_entries);
483 mon->last = rbufp->recv_time;
484 mon->first = mon->last;
485 mon->count = 1;
486 mon->flags = ~(RES_LIMITED | RES_KOD) & flags;
487 mon->leak = 0;
488 memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr));
489 mon->vn_mode = VN_MODE(version, mode);
490 mon->lcladr = rbufp->dstadr;
491 mon->cast_flags = (u_char)(((rbufp->dstadr->flags &
492 INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST
493 : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST);
494
495 /*
496 * Drop him into front of the hash table. Also put him on top of
497 * the MRU list.
498 */
499 LINK_SLIST(mon_hash[hash], mon, hash_next);
500 LINK_DLIST(mon_mru_list, mon, mru);
501
502 return mon->flags;
503 }
504
505
506