1 /*        $NetBSD: ntp_monitor.c,v 1.7 2024/08/18 20:47:17 christos Exp $       */
2 
3 /*
4  * ntp_monitor - monitor ntpd statistics
5  */
6 #ifdef HAVE_CONFIG_H
7 # include <config.h>
8 #endif
9 
10 #include "ntpd.h"
11 #include "ntp_io.h"
12 #include "ntp_if.h"
13 #include "ntp_lists.h"
14 #include "ntp_stdlib.h"
15 #include <ntp_random.h>
16 
17 #include <stdio.h>
18 #include <signal.h>
19 #ifdef HAVE_SYS_IOCTL_H
20 # include <sys/ioctl.h>
21 #endif
22 
23 /*
24  * Record statistics based on source address, mode and version. The
25  * receive procedure calls us with the incoming rbufp before it does
26  * anything else. While at it, implement rate controls for inbound
27  * traffic.
28  *
29  * Each entry is doubly linked into two lists, a hash table and a most-
30  * recently-used (MRU) list. When a packet arrives it is looked up in
31  * the hash table. If found, the statistics are updated and the entry
32  * relinked at the head of the MRU list. If not found, a new entry is
33  * allocated, initialized and linked into both the hash table and at the
34  * head of the MRU list.
35  *
36  * Memory is usually allocated by grabbing a big chunk of new memory and
37  * cutting it up into littler pieces. The exception to this when we hit
38  * the memory limit. Then we free memory by grabbing entries off the
39  * tail for the MRU list, unlinking from the hash table, and
40  * reinitializing.
41  *
42  * INC_MONLIST is the default allocation granularity in entries.
43  * INIT_MONLIST is the default initial allocation in entries.
44  */
45 #ifdef MONMEMINC              /* old name */
46 # define  INC_MONLIST         MONMEMINC
47 #elif !defined(INC_MONLIST)
48 # define  INC_MONLIST         (4 * 1024 / sizeof(mon_entry))
49 #endif
50 #ifndef INIT_MONLIST
51 # define  INIT_MONLIST        (4 * 1024 / sizeof(mon_entry))
52 #endif
53 #ifndef MRU_MAXDEPTH_DEF
54 # define MRU_MAXDEPTH_DEF     (1024 * 1024 / sizeof(mon_entry))
55 #endif
56 
57 /*
58  * Hashing stuff
59  */
60 u_char    mon_hash_bits;
61 
62 /*
63  * Pointers to the hash table and the MRU list.  Memory for the hash
64  * table is allocated only if monitoring is enabled.
65  */
66 mon_entry **        mon_hash; /* MRU hash table */
67 mon_entry mon_mru_list;       /* mru listhead */
68 
69 /*
70  * List of free structures structures, and counters of in-use and total
71  * structures. The free structures are linked with the hash_next field.
72  */
73 static  mon_entry *mon_free;            /* free list or null if none */
74           u_int mru_alloc;              /* mru list + free list count */
75           u_int mru_entries;            /* mru list count */
76           u_int mru_peakentries;                  /* highest mru_entries seen */
77           u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */
78           u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */
79 static    u_int mon_mem_increments;     /* times called malloc() */
80 
81 /*
82  * Parameters of the RES_LIMITED restriction option. We define headway
83  * as the idle time between packets. A packet is discarded if the
84  * headway is less than the minimum, as well as if the average headway
85  * is less than eight times the increment.
86  */
87 int       ntp_minpkt = NTP_MINPKT;      /* minimum seconds between */
88                                                   /* requests from a client */
89 u_char    ntp_minpoll = NTP_MINPOLL;    /* minimum average log2 seconds */
90                                                   /* between client requests */
91 
92 /*
93  * Initialization state.  We may be monitoring, we may not.  If
94  * we aren't, we may not even have allocated any memory yet.
95  */
96           u_int     mon_enabled;                  /* enable switch */
97           u_int     mru_mindepth = 600; /* preempt above this */
98           int       mru_maxage = 64;    /* for entries older than */
99           u_int     mru_maxdepth =                /* MRU count hard limit */
100                               MRU_MAXDEPTH_DEF;
101           int       mon_age = 3000;               /* preemption limit */
102 
103 static    void                mon_getmoremem(void);
104 static    void                remove_from_hash(mon_entry *);
105 static    inline void         mon_free_entry(mon_entry *);
106 static    inline void         mon_reclaim_entry(mon_entry *);
107 
108 
109 /*
110  * init_mon - initialize monitoring global data
111  */
112 void
init_mon(void)113 init_mon(void)
114 {
115           /*
116            * Don't do much of anything here.  We don't allocate memory
117            * until mon_start().
118            */
119           mon_enabled = MON_OFF;
120           INIT_DLIST(mon_mru_list, mru);
121 }
122 
123 
124 /*
125  * remove_from_hash - removes an entry from the address hash table and
126  *                        decrements mru_entries.
127  */
128 static void
remove_from_hash(mon_entry * mon)129 remove_from_hash(
130           mon_entry *mon
131           )
132 {
133           u_int hash;
134           mon_entry *punlinked;
135 
136           mru_entries--;
137           hash = MON_HASH(&mon->rmtadr);
138           UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next,
139                          mon_entry);
140           ENSURE(punlinked == mon);
141 }
142 
143 
144 static inline void
mon_free_entry(mon_entry * m)145 mon_free_entry(
146           mon_entry *m
147           )
148 {
149           ZERO(*m);
150           LINK_SLIST(mon_free, m, hash_next);
151 }
152 
153 
154 /*
155  * mon_reclaim_entry - Remove an entry from the MRU list and from the
156  *                         hash array, then zero-initialize it.  Indirectly
157  *                         decrements mru_entries.
158 
159  * The entry is prepared to be reused.  Before return, in
160  * remove_from_hash(), mru_entries is decremented.  It is the caller's
161  * responsibility to increment it again.
162  */
163 static inline void
mon_reclaim_entry(mon_entry * m)164 mon_reclaim_entry(
165           mon_entry *m
166           )
167 {
168           DEBUG_INSIST(NULL != m);
169 
170           UNLINK_DLIST(m, mru);
171           remove_from_hash(m);
172           ZERO(*m);
173 }
174 
175 
176 /*
177  * mon_getmoremem - get more memory and put it on the free list
178  */
179 static void
mon_getmoremem(void)180 mon_getmoremem(void)
181 {
182           mon_entry *chunk;
183           u_int entries;
184 
185           entries = (0 == mon_mem_increments)
186                           ? mru_initalloc
187                           : mru_incalloc;
188 
189           if (entries) {
190                     chunk = eallocarray(entries, sizeof(*chunk));
191                     mru_alloc += entries;
192                     for (chunk += entries; entries; entries--)
193                               mon_free_entry(--chunk);
194 
195                     mon_mem_increments++;
196           }
197 }
198 
199 
200 /*
201  * mon_start - start up the monitoring software
202  */
203 void
mon_start(int mode)204 mon_start(
205           int mode
206           )
207 {
208           size_t octets;
209           u_int min_hash_slots;
210 
211           if (MON_OFF == mode)                    /* MON_OFF is 0 */
212                     return;
213           if (mon_enabled) {
214                     mon_enabled |= mode;
215                     return;
216           }
217           if (0 == mon_mem_increments)
218                     mon_getmoremem();
219           /*
220            * Select the MRU hash table size to limit the average count
221            * per bucket at capacity (mru_maxdepth) to 8, if possible
222            * given our hash is limited to 16 bits.
223            */
224           min_hash_slots = (mru_maxdepth / 8) + 1;
225           mon_hash_bits = 0;
226           while (min_hash_slots >>= 1)
227                     mon_hash_bits++;
228           mon_hash_bits = max(4, mon_hash_bits);
229           mon_hash_bits = min(16, mon_hash_bits);
230           octets = sizeof(*mon_hash) * MON_HASH_SIZE;
231           mon_hash = erealloc_zero(mon_hash, octets, 0);
232 
233           mon_enabled = mode;
234 }
235 
236 
237 /*
238  * mon_stop - stop the monitoring software
239  */
240 void
mon_stop(int mode)241 mon_stop(
242           int mode
243           )
244 {
245           mon_entry *mon;
246 
247           if (MON_OFF == mon_enabled)
248                     return;
249           if ((mon_enabled & mode) == 0 || mode == MON_OFF)
250                     return;
251 
252           mon_enabled &= ~mode;
253           if (mon_enabled != MON_OFF)
254                     return;
255 
256           /*
257            * Move everything on the MRU list to the free list quickly,
258            * without bothering to remove each from either the MRU list or
259            * the hash table.
260            */
261           ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
262                     mon_free_entry(mon);
263           ITER_DLIST_END()
264 
265           /* empty the MRU list and hash table. */
266           mru_entries = 0;
267           INIT_DLIST(mon_mru_list, mru);
268           zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE);
269 }
270 
271 
272 /*
273  * mon_clearinterface -- remove mru entries referring to a local address
274  *                             which is going away.
275  */
276 void
mon_clearinterface(endpt * lcladr)277 mon_clearinterface(
278           endpt *lcladr
279           )
280 {
281           mon_entry *mon;
282 
283           /* iterate mon over mon_mru_list */
284           ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
285                     if (mon->lcladr == lcladr) {
286                               /* remove from mru list */
287                               UNLINK_DLIST(mon, mru);
288                               /* remove from hash list, adjust mru_entries */
289                               remove_from_hash(mon);
290                               /* put on free list */
291                               mon_free_entry(mon);
292                     }
293           ITER_DLIST_END()
294 }
295 
296 
297 /*
298  * ntp_monitor - record stats about this packet
299  *
300  * Returns supplied restriction flags, with RES_LIMITED and RES_KOD
301  * cleared unless the packet should not be responded to normally
302  * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD).
303  * The returned flags are saved in the MRU entry, so that it reflects
304  * whether the last packet from that source triggered rate limiting,
305  * and if so, possible KoD response.  This implies you can not tell
306  * whether a given address is eligible for rate limiting/KoD from the
307  * monlist restrict bits, only whether or not the last packet triggered
308  * such responses.  ntpdc -c reslist lets you see whether RES_LIMITED
309  * or RES_KOD is lit for a particular address before ntp_monitor()'s
310  * typical dousing.
311  */
312 u_short
ntp_monitor(struct recvbuf * rbufp,u_short flags)313 ntp_monitor(
314           struct recvbuf *rbufp,
315           u_short   flags
316           )
317 {
318           l_fp                interval_fp;
319           struct pkt *        pkt;
320           mon_entry *         mon;
321           mon_entry *         oldest;
322           int                 oldest_age;
323           u_int               hash;
324           u_short             restrict_mask;
325           u_char              mode;
326           u_char              version;
327           int                 interval;
328           int                 head;               /* headway increment */
329           int                 leak;               /* new headway */
330           int                 limit;              /* average threshold */
331 
332           REQUIRE(rbufp != NULL);
333 
334           if (mon_enabled == MON_OFF) {
335                     return ~(RES_LIMITED | RES_KOD) & flags;
336           }
337           pkt = &rbufp->recv_pkt;
338           hash = MON_HASH(&rbufp->recv_srcadr);
339           mode = PKT_MODE(pkt->li_vn_mode);
340           version = PKT_VERSION(pkt->li_vn_mode);
341           mon = mon_hash[hash];
342 
343           /*
344            * We keep track of all traffic for a given IP in one entry,
345            * otherwise cron'ed ntpdate or similar evades RES_LIMITED.
346            */
347 
348           for (; mon != NULL; mon = mon->hash_next) {
349                     if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr)) {
350                               break;
351                     }
352           }
353           if (mon != NULL) {
354                     interval_fp = rbufp->recv_time;
355                     L_SUB(&interval_fp, &mon->last);
356                     /* add one-half second to round up */
357                     L_ADDUF(&interval_fp, 0x80000000);
358                     interval = interval_fp.l_i;
359                     mon->last = rbufp->recv_time;
360                     NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr);
361                     mon->count++;
362                     restrict_mask = flags;
363                     mon->vn_mode = VN_MODE(version, mode);
364 
365                     /* Shuffle to the head of the MRU list. */
366                     UNLINK_DLIST(mon, mru);
367                     LINK_DLIST(mon_mru_list, mon, mru);
368 
369                     /*
370                      * At this point the most recent arrival is first in the
371                      * MRU list.  Decrease the counter by the headway, but
372                      * not less than zero.
373                      */
374                     mon->leak -= interval;
375                     mon->leak = max(0, mon->leak);
376                     head = 1 << ntp_minpoll;
377                     leak = mon->leak + head;
378                     limit = NTP_SHIFT * head;
379 
380                     DPRINTF(2, ("MRU: interval %d headway %d limit %d\n",
381                                   interval, leak, limit));
382 
383                     /*
384                      * If the minimum and average thresholds are not
385                      * exceeded, douse the RES_LIMITED and RES_KOD bits and
386                      * increase the counter by the headway increment.  Note
387                      * that we give a 1-s grace for the minimum threshold
388                      * and a 2-s grace for the headway increment.  If one or
389                      * both thresholds are exceeded and the old counter is
390                      * less than the average threshold, set the counter to
391                      * the average threshold plus the increment and leave
392                      * the RES_LIMITED and RES_KOD bits lit. Otherwise,
393                      * leave the counter alone and douse the RES_KOD bit.
394                      * This rate-limits the KoDs to no more often than the
395                      * average headway.
396                      */
397                     if (interval + 1 >= ntp_minpkt && leak < limit) {
398                               mon->leak = leak - 2;
399                               restrict_mask &= ~(RES_LIMITED | RES_KOD);
400                     } else if (mon->leak < limit) {
401                               mon->leak = limit + head;
402                     } else {
403                               restrict_mask &= ~RES_KOD;
404                     }
405                     mon->flags = restrict_mask;
406 
407                     return mon->flags;
408           }
409 
410           /*
411            * If we got here, this is the first we've heard of this
412            * guy.  Get him some memory, either from the free list
413            * or from the tail of the MRU list.
414            *
415            * The following ntp.conf "mru" knobs come into play determining
416            * the depth (or count) of the MRU list:
417            * - mru_mindepth ("mru mindepth") is a floor beneath which
418            *   entries are kept without regard to their age.  The
419            *   default is 600 which matches the longtime implementation
420            *   limit on the total number of entries.
421            * - mru_maxage ("mru maxage") is a ceiling on the age in
422            *   seconds of entries.  Entries older than this are
423            *   reclaimed once mon_mindepth is exceeded.  64s default.
424            *   Note that entries older than this can easily survive
425            *   as they are reclaimed only as needed.
426            * - mru_maxdepth ("mru maxdepth") is a hard limit on the
427            *   number of entries.
428            * - "mru maxmem" sets mru_maxdepth to the number of entries
429            *   which fit in the given number of kilobytes.  The default is
430            *   1024, or 1 megabyte.
431            * - mru_initalloc ("mru initalloc" sets the count of the
432            *   initial allocation of MRU entries.
433            * - "mru initmem" sets mru_initalloc in units of kilobytes.
434            *   The default is 4.
435            * - mru_incalloc ("mru incalloc" sets the number of entries to
436            *   allocate on-demand each time the free list is empty.
437            * - "mru incmem" sets mru_incalloc in units of kilobytes.
438            *   The default is 4.
439            * Whichever of "mru maxmem" or "mru maxdepth" occurs last in
440            * ntp.conf controls.  Similarly for "mru initalloc" and "mru
441            * initmem", and for "mru incalloc" and "mru incmem".
442            */
443           if (mru_entries < mru_mindepth) {
444                     if (NULL == mon_free)
445                               mon_getmoremem();
446                     UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
447           } else {
448                     oldest = TAIL_DLIST(mon_mru_list, mru);
449                     oldest_age = 0;               /* silence uninit warning */
450                     if (oldest != NULL) {
451                               interval_fp = rbufp->recv_time;
452                               L_SUB(&interval_fp, &oldest->last);
453                               /* add one-half second to round up */
454                               L_ADDUF(&interval_fp, 0x80000000);
455                               oldest_age = interval_fp.l_i;
456                     }
457                     /* note -1 is legal for mru_maxage (disables) */
458                     if (oldest != NULL && mru_maxage < oldest_age) {
459                               mon_reclaim_entry(oldest);
460                               mon = oldest;
461                     } else if (mon_free != NULL || mru_alloc <
462                                  mru_maxdepth) {
463                               if (NULL == mon_free)
464                                         mon_getmoremem();
465                               UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
466                     /* Preempt from the MRU list if old enough. */
467                     } else if (ntp_uurandom() >
468                                  (double)oldest_age / mon_age) {
469                               return ~(RES_LIMITED | RES_KOD) & flags;
470                     } else {
471                               mon_reclaim_entry(oldest);
472                               mon = oldest;
473                     }
474           }
475 
476           INSIST(mon != NULL);
477 
478           /*
479            * Got one, initialize it
480            */
481           mru_entries++;
482           mru_peakentries = max(mru_peakentries, mru_entries);
483           mon->last = rbufp->recv_time;
484           mon->first = mon->last;
485           mon->count = 1;
486           mon->flags = ~(RES_LIMITED | RES_KOD) & flags;
487           mon->leak = 0;
488           memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr));
489           mon->vn_mode = VN_MODE(version, mode);
490           mon->lcladr = rbufp->dstadr;
491           mon->cast_flags = (u_char)(((rbufp->dstadr->flags &
492               INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST
493               : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST);
494 
495           /*
496            * Drop him into front of the hash table. Also put him on top of
497            * the MRU list.
498            */
499           LINK_SLIST(mon_hash[hash], mon, hash_next);
500           LINK_DLIST(mon_mru_list, mon, mru);
501 
502           return mon->flags;
503 }
504 
505 
506