1 /*        $NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $   */
2 /*        $KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $       */
3 
4 /*
5  * Copyright (C) 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1992, 1993
35  *        The Regents of the University of California.  All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * Stephen Deering of Stanford University.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *        @(#)igmp.c          8.1 (Berkeley) 7/19/93
65  */
66 
67 /*
68  * Copyright (c) 1988 Stephen Deering.
69  *
70  * This code is derived from software contributed to Berkeley by
71  * Stephen Deering of Stanford University.
72  *
73  * Redistribution and use in source and binary forms, with or without
74  * modification, are permitted provided that the following conditions
75  * are met:
76  * 1. Redistributions of source code must retain the above copyright
77  *    notice, this list of conditions and the following disclaimer.
78  * 2. Redistributions in binary form must reproduce the above copyright
79  *    notice, this list of conditions and the following disclaimer in the
80  *    documentation and/or other materials provided with the distribution.
81  * 3. All advertising materials mentioning features or use of this software
82  *    must display the following acknowledgement:
83  *        This product includes software developed by the University of
84  *        California, Berkeley and its contributors.
85  * 4. Neither the name of the University nor the names of its contributors
86  *    may be used to endorse or promote products derived from this software
87  *    without specific prior written permission.
88  *
89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  *
101  *        @(#)igmp.c          8.1 (Berkeley) 7/19/93
102  */
103 
104 #include <sys/cdefs.h>
105 __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.101 2019/09/25 09:53:38 ozaki-r Exp $");
106 
107 #ifdef _KERNEL_OPT
108 #include "opt_inet.h"
109 #include "opt_net_mpsafe.h"
110 #endif
111 
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/mbuf.h>
115 #include <sys/socket.h>
116 #include <sys/socketvar.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119 #include <sys/kernel.h>
120 #include <sys/callout.h>
121 #include <sys/cprng.h>
122 #include <sys/rwlock.h>
123 
124 #include <net/if.h>
125 
126 #include <netinet/in.h>
127 #include <netinet/in_var.h>
128 #include <netinet6/in6_var.h>
129 #include <netinet/ip6.h>
130 #include <netinet6/ip6_var.h>
131 #include <netinet6/scope6_var.h>
132 #include <netinet/icmp6.h>
133 #include <netinet6/icmp6_private.h>
134 #include <netinet6/mld6_var.h>
135 
136 static krwlock_t    in6_multilock __cacheline_aligned;
137 
138 /*
139  * Protocol constants
140  */
141 
142 /*
143  * time between repetitions of a node's initial report of interest in a
144  * multicast address(in seconds)
145  */
146 #define MLD_UNSOLICITED_REPORT_INTERVAL 10
147 
148 static struct ip6_pktopts ip6_opts;
149 
150 static void mld_start_listening(struct in6_multi *);
151 static void mld_stop_listening(struct in6_multi *);
152 
153 static struct mld_hdr *mld_allocbuf(struct mbuf **, struct in6_multi *, int);
154 static void mld_sendpkt(struct in6_multi *, int, const struct in6_addr *);
155 static void mld_starttimer(struct in6_multi *);
156 static void mld_stoptimer(struct in6_multi *);
157 static u_long mld_timerresid(struct in6_multi *);
158 
159 static void in6m_ref(struct in6_multi *);
160 static void in6m_unref(struct in6_multi *);
161 static void in6m_destroy(struct in6_multi *);
162 
163 void
mld_init(void)164 mld_init(void)
165 {
166           static u_int8_t hbh_buf[8];
167           struct ip6_hbh *hbh = (struct ip6_hbh *)hbh_buf;
168           u_int16_t rtalert_code = htons((u_int16_t)IP6OPT_RTALERT_MLD);
169 
170           /* ip6h_nxt will be fill in later */
171           hbh->ip6h_len = 0;  /* (8 >> 3) - 1 */
172 
173           /* XXX: grotty hard coding... */
174           hbh_buf[2] = IP6OPT_PADN;     /* 2 byte padding */
175           hbh_buf[3] = 0;
176           hbh_buf[4] = IP6OPT_RTALERT;
177           hbh_buf[5] = IP6OPT_RTALERT_LEN - 2;
178           memcpy(&hbh_buf[6], (void *)&rtalert_code, sizeof(u_int16_t));
179 
180           ip6_opts.ip6po_hbh = hbh;
181           /* We will specify the hoplimit by a multicast option. */
182           ip6_opts.ip6po_hlim = -1;
183           ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
184 
185           rw_init(&in6_multilock);
186 }
187 
188 static void
mld_starttimer(struct in6_multi * in6m)189 mld_starttimer(struct in6_multi *in6m)
190 {
191           struct timeval now;
192 
193           KASSERT(rw_write_held(&in6_multilock));
194           KASSERTMSG(in6m->in6m_timer != IN6M_TIMER_UNDEF,
195               "in6m_timer=%d", in6m->in6m_timer);
196 
197           microtime(&now);
198           in6m->in6m_timer_expire.tv_sec = now.tv_sec + in6m->in6m_timer / hz;
199           in6m->in6m_timer_expire.tv_usec = now.tv_usec +
200               (in6m->in6m_timer % hz) * (1000000 / hz);
201           if (in6m->in6m_timer_expire.tv_usec > 1000000) {
202                     in6m->in6m_timer_expire.tv_sec++;
203                     in6m->in6m_timer_expire.tv_usec -= 1000000;
204           }
205 
206           /* start or restart the timer */
207           callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
208 }
209 
210 /*
211  * mld_stoptimer releases in6_multilock when calling callout_halt.
212  * The caller must ensure in6m won't be freed while releasing the lock.
213  */
214 static void
mld_stoptimer(struct in6_multi * in6m)215 mld_stoptimer(struct in6_multi *in6m)
216 {
217 
218           KASSERT(rw_write_held(&in6_multilock));
219 
220           if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
221                     return;
222 
223           rw_exit(&in6_multilock);
224 
225           callout_halt(&in6m->in6m_timer_ch, NULL);
226 
227           rw_enter(&in6_multilock, RW_WRITER);
228 
229           in6m->in6m_timer = IN6M_TIMER_UNDEF;
230 }
231 
232 static void
mld_timeo(void * arg)233 mld_timeo(void *arg)
234 {
235           struct in6_multi *in6m = arg;
236 
237           KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
238               in6m->in6m_refcount);
239 
240           KERNEL_LOCK_UNLESS_NET_MPSAFE();
241           rw_enter(&in6_multilock, RW_WRITER);
242           if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
243                     goto out;
244 
245           in6m->in6m_timer = IN6M_TIMER_UNDEF;
246 
247           switch (in6m->in6m_state) {
248           case MLD_REPORTPENDING:
249                     mld_start_listening(in6m);
250                     break;
251           default:
252                     mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
253                     break;
254           }
255 
256 out:
257           rw_exit(&in6_multilock);
258           KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
259 }
260 
261 static u_long
mld_timerresid(struct in6_multi * in6m)262 mld_timerresid(struct in6_multi *in6m)
263 {
264           struct timeval now, diff;
265 
266           microtime(&now);
267 
268           if (now.tv_sec > in6m->in6m_timer_expire.tv_sec ||
269               (now.tv_sec == in6m->in6m_timer_expire.tv_sec &&
270               now.tv_usec > in6m->in6m_timer_expire.tv_usec)) {
271                     return (0);
272           }
273           diff = in6m->in6m_timer_expire;
274           diff.tv_sec -= now.tv_sec;
275           diff.tv_usec -= now.tv_usec;
276           if (diff.tv_usec < 0) {
277                     diff.tv_sec--;
278                     diff.tv_usec += 1000000;
279           }
280 
281           /* return the remaining time in milliseconds */
282           return diff.tv_sec * 1000 + diff.tv_usec / 1000;
283 }
284 
285 static void
mld_start_listening(struct in6_multi * in6m)286 mld_start_listening(struct in6_multi *in6m)
287 {
288           struct in6_addr all_in6;
289 
290           KASSERT(rw_write_held(&in6_multilock));
291 
292           /*
293            * RFC2710 page 10:
294            * The node never sends a Report or Done for the link-scope all-nodes
295            * address.
296            * MLD messages are never sent for multicast addresses whose scope is 0
297            * (reserved) or 1 (node-local).
298            */
299           all_in6 = in6addr_linklocal_allnodes;
300           if (in6_setscope(&all_in6, in6m->in6m_ifp, NULL)) {
301                     /* XXX: this should not happen! */
302                     in6m->in6m_timer = 0;
303                     in6m->in6m_state = MLD_OTHERLISTENER;
304           }
305           if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
306               IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) < IPV6_ADDR_SCOPE_LINKLOCAL) {
307                     in6m->in6m_timer = IN6M_TIMER_UNDEF;
308                     in6m->in6m_state = MLD_OTHERLISTENER;
309           } else {
310                     mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
311                     in6m->in6m_timer = cprng_fast32() %
312                         (MLD_UNSOLICITED_REPORT_INTERVAL * hz);
313                     in6m->in6m_state = MLD_IREPORTEDLAST;
314 
315                     mld_starttimer(in6m);
316           }
317 }
318 
319 static void
mld_stop_listening(struct in6_multi * in6m)320 mld_stop_listening(struct in6_multi *in6m)
321 {
322           struct in6_addr allnode, allrouter;
323 
324           KASSERT(rw_lock_held(&in6_multilock));
325 
326           allnode = in6addr_linklocal_allnodes;
327           if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
328                     /* XXX: this should not happen! */
329                     return;
330           }
331           allrouter = in6addr_linklocal_allrouters;
332           if (in6_setscope(&allrouter, in6m->in6m_ifp, NULL)) {
333                     /* XXX impossible */
334                     return;
335           }
336 
337           if (in6m->in6m_state == MLD_IREPORTEDLAST &&
338               (!IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &allnode)) &&
339               IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) >
340               IPV6_ADDR_SCOPE_INTFACELOCAL) {
341                     mld_sendpkt(in6m, MLD_LISTENER_DONE, &allrouter);
342           }
343 }
344 
345 void
mld_input(struct mbuf * m,int off)346 mld_input(struct mbuf *m, int off)
347 {
348           struct ip6_hdr *ip6;
349           struct mld_hdr *mldh;
350           struct ifnet *ifp;
351           struct in6_multi *in6m = NULL;
352           struct in6_addr mld_addr, all_in6;
353           u_long timer = 0;   /* timer value in the MLD query header */
354           struct psref psref;
355 
356           ifp = m_get_rcvif_psref(m, &psref);
357           if (__predict_false(ifp == NULL))
358                     goto out;
359           IP6_EXTHDR_GET(mldh, struct mld_hdr *, m, off, sizeof(*mldh));
360           if (mldh == NULL) {
361                     ICMP6_STATINC(ICMP6_STAT_TOOSHORT);
362                     goto out_nodrop;
363           }
364 
365           ip6 = mtod(m, struct ip6_hdr *);
366 
367           /* source address validation */
368           if (!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) {
369                     /*
370                      * RFC3590 allows the IPv6 unspecified address as the source
371                      * address of MLD report and done messages.  However, as this
372                      * same document says, this special rule is for snooping
373                      * switches and the RFC requires routers to discard MLD packets
374                      * with the unspecified source address.  The RFC only talks
375                      * about hosts receiving an MLD query or report in Security
376                      * Considerations, but this is probably the correct intention.
377                      * RFC3590 does not talk about other cases than link-local and
378                      * the unspecified source addresses, but we believe the same
379                      * rule should be applied.
380                      * As a result, we only allow link-local addresses as the
381                      * source address; otherwise, simply discard the packet.
382                      */
383 #if 0
384                     /*
385                      * XXX: do not log in an input path to avoid log flooding,
386                      * though RFC3590 says "SHOULD log" if the source of a query
387                      * is the unspecified address.
388                      */
389                     char ip6bufs[INET6_ADDRSTRLEN];
390                     char ip6bufm[INET6_ADDRSTRLEN];
391                     log(LOG_INFO,
392                         "mld_input: src %s is not link-local (grp=%s)\n",
393                         IN6_PRINT(ip6bufs,&ip6->ip6_src),
394                         IN6_PRINT(ip6bufm, &mldh->mld_addr));
395 #endif
396                     goto out;
397           }
398 
399           /*
400            * make a copy for local work (in6_setscope() may modify the 1st arg)
401            */
402           mld_addr = mldh->mld_addr;
403           if (in6_setscope(&mld_addr, ifp, NULL)) {
404                     /* XXX: this should not happen! */
405                     goto out;
406           }
407 
408           /*
409            * In the MLD specification, there are 3 states and a flag.
410            *
411            * In Non-Listener state, we simply don't have a membership record.
412            * In Delaying Listener state, our timer is running (in6m->in6m_timer)
413            * In Idle Listener state, our timer is not running
414            * (in6m->in6m_timer==IN6M_TIMER_UNDEF)
415            *
416            * The flag is in6m->in6m_state, it is set to MLD_OTHERLISTENER if
417            * we have heard a report from another member, or MLD_IREPORTEDLAST
418            * if we sent the last report.
419            */
420           switch (mldh->mld_type) {
421           case MLD_LISTENER_QUERY: {
422                     struct in6_multi *next;
423 
424                     if (ifp->if_flags & IFF_LOOPBACK)
425                               break;
426 
427                     if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
428                         !IN6_IS_ADDR_MULTICAST(&mld_addr))
429                               break;    /* print error or log stat? */
430 
431                     all_in6 = in6addr_linklocal_allnodes;
432                     if (in6_setscope(&all_in6, ifp, NULL)) {
433                               /* XXX: this should not happen! */
434                               break;
435                     }
436 
437                     /*
438                      * - Start the timers in all of our membership records
439                      *   that the query applies to for the interface on
440                      *   which the query arrived excl. those that belong
441                      *   to the "all-nodes" group (ff02::1).
442                      * - Restart any timer that is already running but has
443                      *   a value longer than the requested timeout.
444                      * - Use the value specified in the query message as
445                      *   the maximum timeout.
446                      */
447                     timer = ntohs(mldh->mld_maxdelay);
448 
449                     rw_enter(&in6_multilock, RW_WRITER);
450                     /*
451                      * mld_stoptimer and mld_sendpkt release in6_multilock
452                      * temporarily, so we have to prevent in6m from being freed
453                      * while releasing the lock by having an extra reference to it.
454                      *
455                      * Also in6_purge_multi might remove items from the list of the
456                      * ifp while releasing the lock. Fortunately in6_purge_multi is
457                      * never executed as long as we have a psref of the ifp.
458                      */
459                     LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
460                               if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
461                                   IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
462                                   IPV6_ADDR_SCOPE_LINKLOCAL)
463                                         continue;
464 
465                               if (in6m->in6m_state == MLD_REPORTPENDING)
466                                         continue; /* we are not yet ready */
467 
468                               if (!IN6_IS_ADDR_UNSPECIFIED(&mld_addr) &&
469                                   !IN6_ARE_ADDR_EQUAL(&mld_addr, &in6m->in6m_addr))
470                                         continue;
471 
472                               if (timer == 0) {
473                                         in6m_ref(in6m);
474 
475                                         /* send a report immediately */
476                                         mld_stoptimer(in6m);
477                                         mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
478                                         in6m->in6m_state = MLD_IREPORTEDLAST;
479 
480                                         in6m_unref(in6m); /* May free in6m */
481                               } else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
482                                   mld_timerresid(in6m) > timer) {
483                                         in6m->in6m_timer =
484                                            1 + (cprng_fast32() % timer) * hz / 1000;
485                                         mld_starttimer(in6m);
486                               }
487                     }
488                     rw_exit(&in6_multilock);
489                     break;
490               }
491 
492           case MLD_LISTENER_REPORT:
493                     /*
494                      * For fast leave to work, we have to know that we are the
495                      * last person to send a report for this group.  Reports
496                      * can potentially get looped back if we are a multicast
497                      * router, so discard reports sourced by me.
498                      * Note that it is impossible to check IFF_LOOPBACK flag of
499                      * ifp for this purpose, since ip6_mloopback pass the physical
500                      * interface to looutput.
501                      */
502                     if (m->m_flags & M_LOOP) /* XXX: grotty flag, but efficient */
503                               break;
504 
505                     if (!IN6_IS_ADDR_MULTICAST(&mldh->mld_addr))
506                               break;
507 
508                     /*
509                      * If we belong to the group being reported, stop
510                      * our timer for that group.
511                      */
512                     rw_enter(&in6_multilock, RW_WRITER);
513                     in6m = in6_lookup_multi(&mld_addr, ifp);
514                     if (in6m) {
515                               in6m_ref(in6m);
516                               mld_stoptimer(in6m); /* transit to idle state */
517                               in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
518                               in6m_unref(in6m);
519                               in6m = NULL; /* in6m might be freed */
520                     }
521                     rw_exit(&in6_multilock);
522                     break;
523           default:            /* this is impossible */
524 #if 0
525                     /*
526                      * this case should be impossible because of filtering in
527                      * icmp6_input().  But we explicitly disabled this part
528                      * just in case.
529                      */
530                     log(LOG_ERR, "mld_input: illegal type(%d)", mldh->mld_type);
531 #endif
532                     break;
533           }
534 
535 out:
536           m_freem(m);
537 out_nodrop:
538           m_put_rcvif_psref(ifp, &psref);
539 }
540 
541 /*
542  * XXX mld_sendpkt must be called with in6_multilock held and
543  * will release in6_multilock before calling ip6_output and
544  * returning to avoid locking against myself in ip6_output.
545  */
546 static void
mld_sendpkt(struct in6_multi * in6m,int type,const struct in6_addr * dst)547 mld_sendpkt(struct in6_multi *in6m, int type, const struct in6_addr *dst)
548 {
549           struct mbuf *mh;
550           struct mld_hdr *mldh;
551           struct ip6_hdr *ip6 = NULL;
552           struct ip6_moptions im6o;
553           struct in6_ifaddr *ia = NULL;
554           struct ifnet *ifp = in6m->in6m_ifp;
555           int ignflags;
556           struct psref psref;
557           int bound;
558 
559           KASSERT(rw_write_held(&in6_multilock));
560 
561           /*
562            * At first, find a link local address on the outgoing interface
563            * to use as the source address of the MLD packet.
564            * We do not reject tentative addresses for MLD report to deal with
565            * the case where we first join a link-local address.
566            */
567           ignflags = (IN6_IFF_NOTREADY|IN6_IFF_ANYCAST) & ~IN6_IFF_TENTATIVE;
568           bound = curlwp_bind();
569           ia = in6ifa_ifpforlinklocal_psref(ifp, ignflags, &psref);
570           if (ia == NULL) {
571                     curlwp_bindx(bound);
572                     return;
573           }
574           if ((ia->ia6_flags & IN6_IFF_TENTATIVE)) {
575                     ia6_release(ia, &psref);
576                     ia = NULL;
577           }
578 
579           /* Allocate two mbufs to store IPv6 header and MLD header */
580           mldh = mld_allocbuf(&mh, in6m, type);
581           if (mldh == NULL) {
582                     ia6_release(ia, &psref);
583                     curlwp_bindx(bound);
584                     return;
585           }
586 
587           /* fill src/dst here */
588           ip6 = mtod(mh, struct ip6_hdr *);
589           ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
590           ip6->ip6_dst = dst ? *dst : in6m->in6m_addr;
591           ia6_release(ia, &psref);
592           curlwp_bindx(bound);
593 
594           mldh->mld_addr = in6m->in6m_addr;
595           in6_clearscope(&mldh->mld_addr); /* XXX */
596           mldh->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, sizeof(struct ip6_hdr),
597               sizeof(struct mld_hdr));
598 
599           /* construct multicast option */
600           memset(&im6o, 0, sizeof(im6o));
601           im6o.im6o_multicast_if_index = if_get_index(ifp);
602           im6o.im6o_multicast_hlim = 1;
603 
604           /*
605            * Request loopback of the report if we are acting as a multicast
606            * router, so that the process-level routing daemon can hear it.
607            */
608           im6o.im6o_multicast_loop = (ip6_mrouter != NULL);
609 
610           /* increment output statistics */
611           ICMP6_STATINC(ICMP6_STAT_OUTHIST + type);
612           icmp6_ifstat_inc(ifp, ifs6_out_msg);
613           switch (type) {
614           case MLD_LISTENER_QUERY:
615                     icmp6_ifstat_inc(ifp, ifs6_out_mldquery);
616                     break;
617           case MLD_LISTENER_REPORT:
618                     icmp6_ifstat_inc(ifp, ifs6_out_mldreport);
619                     break;
620           case MLD_LISTENER_DONE:
621                     icmp6_ifstat_inc(ifp, ifs6_out_mlddone);
622                     break;
623           }
624 
625           /* XXX we cannot call ip6_output with holding in6_multilock */
626           rw_exit(&in6_multilock);
627 
628           ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
629               &im6o, NULL, NULL);
630 
631           rw_enter(&in6_multilock, RW_WRITER);
632 }
633 
634 static struct mld_hdr *
mld_allocbuf(struct mbuf ** mh,struct in6_multi * in6m,int type)635 mld_allocbuf(struct mbuf **mh, struct in6_multi *in6m, int type)
636 {
637           struct mbuf *md;
638           struct mld_hdr *mldh;
639           struct ip6_hdr *ip6;
640 
641           /*
642            * Allocate mbufs to store ip6 header and MLD header.
643            * We allocate 2 mbufs and make chain in advance because
644            * it is more convenient when inserting the hop-by-hop option later.
645            */
646           MGETHDR(*mh, M_DONTWAIT, MT_HEADER);
647           if (*mh == NULL)
648                     return NULL;
649           MGET(md, M_DONTWAIT, MT_DATA);
650           if (md == NULL) {
651                     m_free(*mh);
652                     *mh = NULL;
653                     return NULL;
654           }
655           (*mh)->m_next = md;
656           md->m_next = NULL;
657 
658           m_reset_rcvif((*mh));
659           (*mh)->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
660           (*mh)->m_len = sizeof(struct ip6_hdr);
661           m_align(*mh, sizeof(struct ip6_hdr));
662 
663           /* fill in the ip6 header */
664           ip6 = mtod(*mh, struct ip6_hdr *);
665           memset(ip6, 0, sizeof(*ip6));
666           ip6->ip6_flow = 0;
667           ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
668           ip6->ip6_vfc |= IPV6_VERSION;
669           /* ip6_plen will be set later */
670           ip6->ip6_nxt = IPPROTO_ICMPV6;
671           /* ip6_hlim will be set by im6o.im6o_multicast_hlim */
672           /* ip6_src/dst will be set by mld_sendpkt() or mld_sendbuf() */
673 
674           /* fill in the MLD header as much as possible */
675           md->m_len = sizeof(struct mld_hdr);
676           mldh = mtod(md, struct mld_hdr *);
677           memset(mldh, 0, sizeof(struct mld_hdr));
678           mldh->mld_type = type;
679           return mldh;
680 }
681 
682 static void
in6m_ref(struct in6_multi * in6m)683 in6m_ref(struct in6_multi *in6m)
684 {
685 
686           KASSERT(rw_write_held(&in6_multilock));
687           in6m->in6m_refcount++;
688 }
689 
690 static void
in6m_unref(struct in6_multi * in6m)691 in6m_unref(struct in6_multi *in6m)
692 {
693 
694           KASSERT(rw_write_held(&in6_multilock));
695           if (--in6m->in6m_refcount == 0)
696                     in6m_destroy(in6m);
697 }
698 
699 /*
700  * Add an address to the list of IP6 multicast addresses for a given interface.
701  */
702 struct    in6_multi *
in6_addmulti(struct in6_addr * maddr6,struct ifnet * ifp,int * errorp,int timer)703 in6_addmulti(struct in6_addr *maddr6, struct ifnet *ifp, int *errorp,
704     int timer)
705 {
706           struct    sockaddr_in6 sin6;
707           struct    in6_multi *in6m;
708 
709           *errorp = 0;
710 
711           rw_enter(&in6_multilock, RW_WRITER);
712           /*
713            * See if address already in list.
714            */
715           in6m = in6_lookup_multi(maddr6, ifp);
716           if (in6m != NULL) {
717                     /*
718                      * Found it; just increment the reference count.
719                      */
720                     in6m->in6m_refcount++;
721           } else {
722                     /*
723                      * New address; allocate a new multicast record
724                      * and link it into the interface's multicast list.
725                      */
726                     in6m = malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
727                     if (in6m == NULL) {
728                               *errorp = ENOBUFS;
729                               goto out;
730                     }
731 
732                     in6m->in6m_addr = *maddr6;
733                     in6m->in6m_ifp = ifp;
734                     in6m->in6m_refcount = 1;
735                     in6m->in6m_timer = IN6M_TIMER_UNDEF;
736                     callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
737                     callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
738 
739                     LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
740 
741                     /*
742                      * Ask the network driver to update its multicast reception
743                      * filter appropriately for the new address.
744                      */
745                     sockaddr_in6_init(&sin6, maddr6, 0, 0, 0);
746                     *errorp = if_mcast_op(ifp, SIOCADDMULTI, sin6tosa(&sin6));
747                     if (*errorp) {
748                               callout_destroy(&in6m->in6m_timer_ch);
749                               LIST_REMOVE(in6m, in6m_entry);
750                               free(in6m, M_IPMADDR);
751                               in6m = NULL;
752                               goto out;
753                     }
754 
755                     in6m->in6m_timer = timer;
756                     if (in6m->in6m_timer > 0) {
757                               in6m->in6m_state = MLD_REPORTPENDING;
758                               mld_starttimer(in6m);
759                               goto out;
760                     }
761 
762                     /*
763                      * Let MLD6 know that we have joined a new IP6 multicast
764                      * group.
765                      */
766                     mld_start_listening(in6m);
767           }
768 out:
769           rw_exit(&in6_multilock);
770           return in6m;
771 }
772 
773 static void
in6m_destroy(struct in6_multi * in6m)774 in6m_destroy(struct in6_multi *in6m)
775 {
776           struct sockaddr_in6 sin6;
777 
778           KASSERT(rw_write_held(&in6_multilock));
779           KASSERTMSG(in6m->in6m_refcount == 0, "in6m_refcount=%d",
780               in6m->in6m_refcount);
781 
782           /*
783            * Unlink from list if it's listed.  This must be done before
784            * mld_stop_listening because it releases in6_multilock and that allows
785            * someone to look up the removing in6m from the list and add a
786            * reference to the entry unexpectedly.
787            */
788           if (in6_lookup_multi(&in6m->in6m_addr, in6m->in6m_ifp) != NULL)
789                     LIST_REMOVE(in6m, in6m_entry);
790 
791           /*
792            * No remaining claims to this record; let MLD6 know
793            * that we are leaving the multicast group.
794            */
795           mld_stop_listening(in6m);
796 
797           /*
798            * Delete all references of this multicasting group from
799            * the membership arrays
800            */
801           in6_purge_mcast_references(in6m);
802 
803           /*
804            * Notify the network driver to update its multicast
805            * reception filter.
806            */
807           sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
808           if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
809 
810           /* Tell mld_timeo we're halting the timer */
811           in6m->in6m_timer = IN6M_TIMER_UNDEF;
812 
813           rw_exit(&in6_multilock);
814           callout_halt(&in6m->in6m_timer_ch, NULL);
815           callout_destroy(&in6m->in6m_timer_ch);
816 
817           free(in6m, M_IPMADDR);
818           rw_enter(&in6_multilock, RW_WRITER);
819 }
820 
821 /*
822  * Delete a multicast address record.
823  */
824 void
in6_delmulti_locked(struct in6_multi * in6m)825 in6_delmulti_locked(struct in6_multi *in6m)
826 {
827 
828           KASSERT(rw_write_held(&in6_multilock));
829           KASSERTMSG(in6m->in6m_refcount > 0, "in6m_refcount=%d",
830               in6m->in6m_refcount);
831 
832           /*
833            * The caller should have a reference to in6m. So we don't need to care
834            * of releasing the lock in mld_stoptimer.
835            */
836           mld_stoptimer(in6m);
837           if (--in6m->in6m_refcount == 0)
838                     in6m_destroy(in6m);
839 }
840 
841 void
in6_delmulti(struct in6_multi * in6m)842 in6_delmulti(struct in6_multi *in6m)
843 {
844 
845           rw_enter(&in6_multilock, RW_WRITER);
846           in6_delmulti_locked(in6m);
847           rw_exit(&in6_multilock);
848 }
849 
850 /*
851  * Look up the in6_multi record for a given IP6 multicast address
852  * on a given interface. If no matching record is found, "in6m"
853  * returns NULL.
854  */
855 struct in6_multi *
in6_lookup_multi(const struct in6_addr * addr,const struct ifnet * ifp)856 in6_lookup_multi(const struct in6_addr *addr, const struct ifnet *ifp)
857 {
858           struct in6_multi *in6m;
859 
860           KASSERT(rw_lock_held(&in6_multilock));
861 
862           LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
863                     if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
864                               break;
865           }
866           return in6m;
867 }
868 
869 void
in6_lookup_and_delete_multi(const struct in6_addr * addr,const struct ifnet * ifp)870 in6_lookup_and_delete_multi(const struct in6_addr *addr,
871     const struct ifnet *ifp)
872 {
873           struct in6_multi *in6m;
874 
875           rw_enter(&in6_multilock, RW_WRITER);
876           in6m = in6_lookup_multi(addr, ifp);
877           if (in6m != NULL)
878                     in6_delmulti_locked(in6m);
879           rw_exit(&in6_multilock);
880 }
881 
882 bool
in6_multi_group(const struct in6_addr * addr,const struct ifnet * ifp)883 in6_multi_group(const struct in6_addr *addr, const struct ifnet *ifp)
884 {
885           bool ingroup;
886 
887           rw_enter(&in6_multilock, RW_READER);
888           ingroup = in6_lookup_multi(addr, ifp) != NULL;
889           rw_exit(&in6_multilock);
890 
891           return ingroup;
892 }
893 
894 /*
895  * Purge in6_multi records associated to the interface.
896  */
897 void
in6_purge_multi(struct ifnet * ifp)898 in6_purge_multi(struct ifnet *ifp)
899 {
900           struct in6_multi *in6m, *next;
901 
902           rw_enter(&in6_multilock, RW_WRITER);
903           LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
904                     LIST_REMOVE(in6m, in6m_entry);
905                     /*
906                      * Normally multicast addresses are already purged at this
907                      * point. Remaining references aren't accessible via ifp,
908                      * so what we can do here is to prevent ifp from being
909                      * accessed via in6m by removing it from the list of ifp.
910                      */
911                     mld_stoptimer(in6m);
912           }
913           rw_exit(&in6_multilock);
914 }
915 
916 void
in6_multi_lock(int op)917 in6_multi_lock(int op)
918 {
919 
920           rw_enter(&in6_multilock, op);
921 }
922 
923 void
in6_multi_unlock(void)924 in6_multi_unlock(void)
925 {
926 
927           rw_exit(&in6_multilock);
928 }
929 
930 bool
in6_multi_locked(int op)931 in6_multi_locked(int op)
932 {
933 
934           switch (op) {
935           case RW_READER:
936                     return rw_read_held(&in6_multilock);
937           case RW_WRITER:
938                     return rw_write_held(&in6_multilock);
939           default:
940                     return rw_lock_held(&in6_multilock);
941           }
942 }
943 
944 struct in6_multi_mship *
in6_joingroup(struct ifnet * ifp,struct in6_addr * addr,int * errorp,int timer)945 in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int timer)
946 {
947           struct in6_multi_mship *imm;
948 
949           imm = malloc(sizeof(*imm), M_IPMADDR, M_NOWAIT|M_ZERO);
950           if (imm == NULL) {
951                     *errorp = ENOBUFS;
952                     return NULL;
953           }
954 
955           imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, timer);
956           if (!imm->i6mm_maddr) {
957                     /* *errorp is already set */
958                     free(imm, M_IPMADDR);
959                     return NULL;
960           }
961           return imm;
962 }
963 
964 int
in6_leavegroup(struct in6_multi_mship * imm)965 in6_leavegroup(struct in6_multi_mship *imm)
966 {
967           struct in6_multi *in6m;
968 
969           rw_enter(&in6_multilock, RW_WRITER);
970           in6m = imm->i6mm_maddr;
971           imm->i6mm_maddr = NULL;
972           if (in6m != NULL) {
973                     in6_delmulti_locked(in6m);
974           }
975           rw_exit(&in6_multilock);
976           free(imm, M_IPMADDR);
977           return 0;
978 }
979 
980 /*
981  * DEPRECATED: keep it just to avoid breaking old sysctl users.
982  */
983 static int
in6_mkludge_sysctl(SYSCTLFN_ARGS)984 in6_mkludge_sysctl(SYSCTLFN_ARGS)
985 {
986 
987           if (namelen != 1)
988                     return EINVAL;
989           *oldlenp = 0;
990           return 0;
991 }
992 
993 static int
in6_multicast_sysctl(SYSCTLFN_ARGS)994 in6_multicast_sysctl(SYSCTLFN_ARGS)
995 {
996           struct ifnet *ifp;
997           struct ifaddr *ifa;
998           struct in6_ifaddr *ia6;
999           struct in6_multi *in6m;
1000           uint32_t tmp;
1001           int error;
1002           size_t written;
1003           struct psref psref, psref_ia;
1004           int bound, s;
1005 
1006           if (namelen != 1)
1007                     return EINVAL;
1008 
1009           rw_enter(&in6_multilock, RW_READER);
1010 
1011           bound = curlwp_bind();
1012           ifp = if_get_byindex(name[0], &psref);
1013           if (ifp == NULL) {
1014                     curlwp_bindx(bound);
1015                     rw_exit(&in6_multilock);
1016                     return ENODEV;
1017           }
1018 
1019           if (oldp == NULL) {
1020                     *oldlenp = 0;
1021                     s = pserialize_read_enter();
1022                     IFADDR_READER_FOREACH(ifa, ifp) {
1023                               LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1024                                         *oldlenp += 2 * sizeof(struct in6_addr) +
1025                                             sizeof(uint32_t);
1026                               }
1027                     }
1028                     pserialize_read_exit(s);
1029                     if_put(ifp, &psref);
1030                     curlwp_bindx(bound);
1031                     rw_exit(&in6_multilock);
1032                     return 0;
1033           }
1034 
1035           error = 0;
1036           written = 0;
1037           s = pserialize_read_enter();
1038           IFADDR_READER_FOREACH(ifa, ifp) {
1039                     if (ifa->ifa_addr->sa_family != AF_INET6)
1040                               continue;
1041 
1042                     ifa_acquire(ifa, &psref_ia);
1043                     pserialize_read_exit(s);
1044 
1045                     ia6 = ifatoia6(ifa);
1046                     LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
1047                               if (written + 2 * sizeof(struct in6_addr) +
1048                                   sizeof(uint32_t) > *oldlenp)
1049                                         goto done;
1050                               /*
1051                                * XXX return the first IPv6 address to keep backward
1052                                * compatibility, however now multicast addresses
1053                                * don't belong to any IPv6 addresses so it should be
1054                                * unnecessary.
1055                                */
1056                               error = sysctl_copyout(l, &ia6->ia_addr.sin6_addr,
1057                                   oldp, sizeof(struct in6_addr));
1058                               if (error)
1059                                         goto done;
1060                               oldp = (char *)oldp + sizeof(struct in6_addr);
1061                               written += sizeof(struct in6_addr);
1062                               error = sysctl_copyout(l, &in6m->in6m_addr,
1063                                   oldp, sizeof(struct in6_addr));
1064                               if (error)
1065                                         goto done;
1066                               oldp = (char *)oldp + sizeof(struct in6_addr);
1067                               written += sizeof(struct in6_addr);
1068                               tmp = in6m->in6m_refcount;
1069                               error = sysctl_copyout(l, &tmp, oldp, sizeof(tmp));
1070                               if (error)
1071                                         goto done;
1072                               oldp = (char *)oldp + sizeof(tmp);
1073                               written += sizeof(tmp);
1074                     }
1075 
1076                     s = pserialize_read_enter();
1077 
1078                     break;
1079           }
1080           pserialize_read_exit(s);
1081 done:
1082           ifa_release(ifa, &psref_ia);
1083           if_put(ifp, &psref);
1084           curlwp_bindx(bound);
1085           rw_exit(&in6_multilock);
1086           *oldlenp = written;
1087           return error;
1088 }
1089 
1090 void
in6_sysctl_multicast_setup(struct sysctllog ** clog)1091 in6_sysctl_multicast_setup(struct sysctllog **clog)
1092 {
1093 
1094           sysctl_createv(clog, 0, NULL, NULL,
1095                            CTLFLAG_PERMANENT,
1096                            CTLTYPE_NODE, "inet6", NULL,
1097                            NULL, 0, NULL, 0,
1098                            CTL_NET, PF_INET6, CTL_EOL);
1099 
1100           sysctl_createv(clog, 0, NULL, NULL,
1101                            CTLFLAG_PERMANENT,
1102                            CTLTYPE_NODE, "multicast",
1103                            SYSCTL_DESCR("Multicast information"),
1104                            in6_multicast_sysctl, 0, NULL, 0,
1105                            CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1106 
1107           sysctl_createv(clog, 0, NULL, NULL,
1108                            CTLFLAG_PERMANENT,
1109                            CTLTYPE_NODE, "multicast_kludge",
1110                            SYSCTL_DESCR("multicast kludge information"),
1111                            in6_mkludge_sysctl, 0, NULL, 0,
1112                            CTL_NET, PF_INET6, CTL_CREATE, CTL_EOL);
1113 }
1114