1 /*        $NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $     */
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  *
6  * This code is derived from software contributed to The NetBSD Foundation
7  * by Roy Marples.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: nd.c,v 1.7 2024/05/30 23:00:39 riastradh Exp $");
32 
33 #include <sys/callout.h>
34 #include <sys/mbuf.h>
35 #include <sys/socketvar.h> /* for softnet_lock */
36 
37 #include <net/if_llatbl.h>
38 #include <net/nd.h>
39 #include <net/route.h>
40 
41 #include <netinet/in.h>
42 #include <netinet/ip6.h>
43 
44 static struct nd_domain *nd_domains[AF_MAX];
45 
46 static int nd_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
47 
48 static void nd_set_timertick(struct llentry *, time_t);
49 static struct nd_domain *nd_find_domain(int);
50 
51 static void
nd_timer(void * arg)52 nd_timer(void *arg)
53 {
54           struct llentry *ln = arg;
55           struct nd_domain *nd;
56           struct ifnet *ifp = NULL;
57           struct psref psref;
58           struct mbuf *m = NULL;
59           bool send_ns = false;
60           int16_t missed = ND_LLINFO_NOSTATE;
61           union l3addr taddr, *daddrp = NULL;
62 
63           SOFTNET_KERNEL_LOCK_UNLESS_NET_MPSAFE();
64           LLE_WLOCK(ln);
65 
66           if (!(ln->la_flags & LLE_LINKED))
67                     goto out;
68           if (ln->ln_ntick > 0) {
69                     nd_set_timer(ln, ND_TIMER_TICK);
70                     goto out;
71           }
72 
73           nd = nd_find_domain(ln->lle_tbl->llt_af);
74           ifp = ln->lle_tbl->llt_ifp;
75           KASSERT(ifp != NULL);
76           if_acquire(ifp, &psref);
77 
78           memcpy(&taddr, &ln->r_l3addr, sizeof(taddr));
79 
80           switch (ln->ln_state) {
81           case ND_LLINFO_WAITDELETE:
82                     LLE_REMREF(ln);
83                     nd->nd_free(ln, 0);
84                     ln = NULL;
85                     break;
86 
87           case ND_LLINFO_INCOMPLETE:
88                     send_ns = true;
89                     if (ln->ln_asked++ < nd->nd_mmaxtries)
90                               break;
91 
92                     if (ln->ln_hold) {
93                               struct mbuf *m0, *mnxt;
94 
95                               /*
96                                * Assuming every packet in ln_hold
97                                * has the same IP header.
98                                */
99                               m = ln->ln_hold;
100                               for (m0 = m->m_nextpkt; m0 != NULL; m0 = mnxt) {
101                                         mnxt = m0->m_nextpkt;
102                                         m0->m_nextpkt = NULL;
103                                         m_freem(m0);
104                               }
105 
106                               m->m_nextpkt = NULL;
107                               ln->ln_hold = NULL;
108                               ln->la_numheld = 0;
109                     }
110 
111                     KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
112                         ln->la_numheld);
113 
114                     missed = ND_LLINFO_INCOMPLETE;
115                     ln->ln_state = ND_LLINFO_WAITDELETE;
116                     break;
117 
118           case ND_LLINFO_REACHABLE:
119                     if (!ND_IS_LLINFO_PERMANENT(ln)) {
120                               ln->ln_state = ND_LLINFO_STALE;
121                               nd_set_timer(ln, ND_TIMER_GC);
122                     }
123                     break;
124 
125           case ND_LLINFO_PURGE: /* FALLTHROUGH */
126           case ND_LLINFO_STALE:
127                     if (!ND_IS_LLINFO_PERMANENT(ln)) {
128                               LLE_REMREF(ln);
129                               nd->nd_free(ln, 1);
130                               ln = NULL;
131                     }
132                     break;
133 
134           case ND_LLINFO_DELAY:
135                     if (nd->nd_nud_enabled(ifp)) {
136                               ln->ln_asked = 1;
137                               ln->ln_state = ND_LLINFO_PROBE;
138                               send_ns = true;
139                               daddrp = &taddr;
140                     } else {
141                               ln->ln_state = ND_LLINFO_STALE;
142                               nd_set_timer(ln, ND_TIMER_GC);
143                     }
144                     break;
145 
146           case ND_LLINFO_PROBE:
147                     send_ns = true;
148                     if (ln->ln_asked++ < nd->nd_umaxtries) {
149                               daddrp = &taddr;
150                     } else {
151                               ln->ln_state = ND_LLINFO_UNREACHABLE;
152                               ln->ln_asked = 1;
153                               missed = ND_LLINFO_PROBE;
154                               /* nd_missed() consumers can use missed to know if
155                                * they need to send ICMP UNREACHABLE or not. */
156                     }
157                     break;
158           case ND_LLINFO_UNREACHABLE:
159                     /*
160                      * RFC 7048 Section 3 says in the UNREACHABLE state
161                      * packets continue to be sent to the link-layer address and
162                      * then backoff exponentially.
163                      * We adjust this slightly and move to the INCOMPLETE state
164                      * after nd_mmaxtries probes and then start backing off.
165                      *
166                      * This results in simpler code whilst providing a more robust
167                      * model which doubles the time to failure over what we did
168                      * before. We don't want to be back to the old ARP model where
169                      * no unreachability errors are returned because very
170                      * few applications would look at unreachability hints provided
171                      * such as ND_LLINFO_UNREACHABLE or RTM_MISS.
172                      */
173                     send_ns = true;
174                     if (ln->ln_asked++ < nd->nd_mmaxtries)
175                               break;
176 
177                     missed = ND_LLINFO_UNREACHABLE;
178                     ln->ln_state = ND_LLINFO_WAITDELETE;
179                     ln->la_flags &= ~LLE_VALID;
180                     break;
181           }
182 
183           if (send_ns) {
184                     uint8_t lladdr[255], *lladdrp;
185                     union l3addr src, *psrc;
186 
187                     if (ln->ln_state == ND_LLINFO_WAITDELETE)
188                               nd_set_timer(ln, ND_TIMER_RETRANS_BACKOFF);
189                     else
190                               nd_set_timer(ln, ND_TIMER_RETRANS);
191                     if (ln->ln_state > ND_LLINFO_INCOMPLETE &&
192                         ln->la_flags & LLE_VALID)
193                     {
194                               KASSERT(sizeof(lladdr) >= ifp->if_addrlen);
195                               memcpy(lladdr, &ln->ll_addr, ifp->if_addrlen);
196                               lladdrp = lladdr;
197                     } else
198                               lladdrp = NULL;
199                     psrc = nd->nd_holdsrc(ln, &src);
200                     LLE_FREE_LOCKED(ln);
201                     ln = NULL;
202                     nd->nd_output(ifp, daddrp, &taddr, lladdrp, psrc);
203           }
204 
205 out:
206           if (ln != NULL)
207                     LLE_FREE_LOCKED(ln);
208           SOFTNET_KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
209 
210           if (missed != ND_LLINFO_NOSTATE)
211                     nd->nd_missed(ifp, &taddr, missed, m);
212           if (ifp != NULL)
213                     if_release(ifp, &psref);
214 }
215 
216 static void
nd_set_timertick(struct llentry * ln,time_t xtick)217 nd_set_timertick(struct llentry *ln, time_t xtick)
218 {
219 
220           CTASSERT(sizeof(time_t) > sizeof(int));
221           KASSERT(xtick >= 0);
222 
223           /*
224            * We have to take care of a reference leak which occurs if
225            * callout_reset overwrites a pending callout schedule.  Unfortunately
226            * we don't have a mean to know the overwrite, so we need to know it
227            * using callout_stop.  We need to call callout_pending first to exclude
228            * the case that the callout has never been scheduled.
229            */
230           if (callout_pending(&ln->la_timer)) {
231                     bool expired;
232 
233                     expired = callout_stop(&ln->la_timer);
234                     if (!expired)
235                               LLE_REMREF(ln);
236           }
237 
238           ln->ln_expire = time_uptime + xtick / hz;
239           LLE_ADDREF(ln);
240           if (xtick > INT_MAX) {
241                     ln->ln_ntick = xtick - INT_MAX;
242                     xtick = INT_MAX;
243           } else {
244                     ln->ln_ntick = 0;
245           }
246           callout_reset(&ln->ln_timer_ch, xtick, nd_timer, ln);
247 }
248 
249 void
nd_set_timer(struct llentry * ln,int type)250 nd_set_timer(struct llentry *ln, int type)
251 {
252           time_t xtick;
253           struct ifnet *ifp;
254           struct nd_domain *nd;
255 
256           LLE_WLOCK_ASSERT(ln);
257 
258           ifp = ln->lle_tbl->llt_ifp;
259           nd = nd_find_domain(ln->lle_tbl->llt_af);
260 
261           switch (type) {
262           case ND_TIMER_IMMEDIATE:
263                     xtick = 0;
264                     break;
265           case ND_TIMER_TICK:
266                     xtick = ln->ln_ntick;
267                     break;
268           case ND_TIMER_RETRANS:
269                     xtick = nd->nd_retrans(ifp) * hz / 1000;
270                     break;
271           case ND_TIMER_RETRANS_BACKOFF:
272           {
273                     unsigned int retrans = nd->nd_retrans(ifp);
274                     unsigned int attempts = ln->ln_asked - nd->nd_mmaxtries;
275 
276                     xtick = retrans;
277                     while (attempts-- != 0) {
278                               xtick *= nd->nd_retransmultiple;
279                               if (xtick > nd->nd_maxretrans || xtick < retrans) {
280                                         xtick = nd->nd_maxretrans;
281                                         break;
282                               }
283                     }
284                     xtick = xtick * hz / 1000;
285                     break;
286           }
287           case ND_TIMER_REACHABLE:
288                     xtick = nd->nd_reachable(ifp) * hz / 1000;
289                     break;
290           case ND_TIMER_EXPIRE:
291                     if (ln->ln_expire > time_uptime)
292                               xtick = (ln->ln_expire - time_uptime) * hz;
293                     else
294                               xtick = nd_gctimer * hz;
295                     break;
296           case ND_TIMER_DELAY:
297                     xtick = nd->nd_delay * hz;
298                     break;
299           case ND_TIMER_GC:
300                     xtick = nd_gctimer * hz;
301                     break;
302           default:
303                     panic("%s: invalid timer type\n", __func__);
304           }
305 
306           nd_set_timertick(ln, xtick);
307 }
308 
309 int
nd_resolve(struct llentry * ln,const struct rtentry * rt,struct mbuf * m,uint8_t * lldst,size_t dstsize)310 nd_resolve(struct llentry *ln, const struct rtentry *rt, struct mbuf *m,
311     uint8_t *lldst, size_t dstsize)
312 {
313           struct ifnet *ifp;
314           struct nd_domain *nd;
315           int error;
316 
317           LLE_WLOCK_ASSERT(ln);
318 
319           ifp = ln->lle_tbl->llt_ifp;
320           nd = nd_find_domain(ln->lle_tbl->llt_af);
321 
322           /* We don't have to do link-layer address resolution on a p2p link. */
323           if (ifp->if_flags & IFF_POINTOPOINT &&
324               ln->ln_state < ND_LLINFO_REACHABLE)
325           {
326                     ln->ln_state = ND_LLINFO_STALE;
327                     nd_set_timer(ln, ND_TIMER_GC);
328           }
329 
330           /*
331            * The first time we send a packet to a neighbor whose entry is
332            * STALE, we have to change the state to DELAY and a sets a timer to
333            * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
334            * neighbor unreachability detection on expiration.
335            * (RFC 2461 7.3.3)
336            */
337           if (ln->ln_state == ND_LLINFO_STALE) {
338                     ln->ln_asked = 0;
339                     ln->ln_state = ND_LLINFO_DELAY;
340                     nd_set_timer(ln, ND_TIMER_DELAY);
341           }
342 
343           /*
344            * If the neighbor cache entry has a state other than INCOMPLETE
345            * (i.e. its link-layer address is already resolved), just
346            * send the packet.
347            */
348           if (ln->ln_state > ND_LLINFO_INCOMPLETE) {
349                     KASSERT((ln->la_flags & LLE_VALID) != 0);
350                     memcpy(lldst, &ln->ll_addr, MIN(dstsize, ifp->if_addrlen));
351                     LLE_WUNLOCK(ln);
352                     return 0;
353           }
354 
355           /*
356            * There is a neighbor cache entry, but no ethernet address
357            * response yet.  Append this latest packet to the end of the
358            * packet queue in the mbuf, unless the number of the packet
359            * does not exceed maxqueuelen.  When it exceeds maxqueuelen,
360            * the oldest packet in the queue will be removed.
361            */
362           if (ln->ln_state == ND_LLINFO_NOSTATE ||
363               ln->ln_state == ND_LLINFO_WAITDELETE)
364                     ln->ln_state = ND_LLINFO_INCOMPLETE;
365 
366 #ifdef MBUFTRACE
367           m_claimm(m, ln->lle_tbl->llt_mowner);
368 #endif
369           if (ln->ln_hold != NULL) {
370                     struct mbuf *m_hold;
371                     int i;
372 
373                     i = 0;
374                     for (m_hold = ln->ln_hold; m_hold; m_hold = m_hold->m_nextpkt) {
375                               i++;
376                               if (m_hold->m_nextpkt == NULL) {
377                                         m_hold->m_nextpkt = m;
378                                         break;
379                               }
380                     }
381                     KASSERTMSG(ln->la_numheld == i, "la_numheld=%d i=%d",
382                         ln->la_numheld, i);
383                     while (i >= nd->nd_maxqueuelen) {
384                               m_hold = ln->ln_hold;
385                               ln->ln_hold = ln->ln_hold->m_nextpkt;
386                               m_freem(m_hold);
387                               i--;
388                               ln->la_numheld--;
389                     }
390           } else {
391                     KASSERTMSG(ln->la_numheld == 0, "la_numheld=%d",
392                         ln->la_numheld);
393                     ln->ln_hold = m;
394           }
395 
396           KASSERTMSG(ln->la_numheld < nd->nd_maxqueuelen,
397               "la_numheld=%d nd_maxqueuelen=%d",
398               ln->la_numheld, nd->nd_maxqueuelen);
399           ln->la_numheld++;
400 
401           if (ln->ln_asked >= nd->nd_mmaxtries)
402                     error = (rt != NULL && rt->rt_flags & RTF_GATEWAY) ?
403                         EHOSTUNREACH : EHOSTDOWN;
404           else
405                     error = EWOULDBLOCK;
406 
407           /*
408            * If there has been no NS for the neighbor after entering the
409            * INCOMPLETE state, send the first solicitation.
410            */
411           if (!ND_IS_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
412                     struct psref psref;
413                     union l3addr dst, src, *psrc;
414 
415                     ln->ln_asked++;
416                     nd_set_timer(ln, ND_TIMER_RETRANS);
417                     memcpy(&dst, &ln->r_l3addr, sizeof(dst));
418                     psrc = nd->nd_holdsrc(ln, &src);
419                     if_acquire(ifp, &psref);
420                     LLE_WUNLOCK(ln);
421 
422                     nd->nd_output(ifp, NULL, &dst, NULL, psrc);
423                     if_release(ifp, &psref);
424           } else
425                     LLE_WUNLOCK(ln);
426 
427           return error;
428 }
429 
430 void
nd_nud_hint(struct llentry * ln)431 nd_nud_hint(struct llentry *ln)
432 {
433           struct nd_domain *nd;
434 
435           if (ln == NULL)
436                     return;
437 
438           LLE_WLOCK_ASSERT(ln);
439 
440           if (ln->ln_state < ND_LLINFO_REACHABLE)
441                     goto done;
442 
443           nd = nd_find_domain(ln->lle_tbl->llt_af);
444 
445           /*
446            * if we get upper-layer reachability confirmation many times,
447            * it is possible we have false information.
448            */
449           ln->ln_byhint++;
450           if (ln->ln_byhint > nd->nd_maxnudhint)
451                     goto done;
452 
453           ln->ln_state = ND_LLINFO_REACHABLE;
454           if (!ND_IS_LLINFO_PERMANENT(ln))
455                     nd_set_timer(ln, ND_TIMER_REACHABLE);
456 
457 done:
458           LLE_WUNLOCK(ln);
459 
460           return;
461 }
462 
463 static struct nd_domain *
nd_find_domain(int af)464 nd_find_domain(int af)
465 {
466 
467           KASSERT(af < __arraycount(nd_domains) && nd_domains[af] != NULL);
468           return nd_domains[af];
469 }
470 
471 void
nd_attach_domain(struct nd_domain * nd)472 nd_attach_domain(struct nd_domain *nd)
473 {
474 
475           KASSERT(nd->nd_family < __arraycount(nd_domains));
476           nd_domains[nd->nd_family] = nd;
477 }
478