xref: /dragonfly/sys/netinet/ip_carp.c (revision 2b3f93ea6d1f70880f3e87f3c2cbe0dc0bfc9332)
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29 
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/caps.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50 
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53 
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82 
83 #include <netinet/ip_carp.h>
84 
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250 
251 #define   CARP_IFNAME                   "carp"
252 #define CARP_IS_RUNNING(ifp)  \
253           (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254 
255 struct carp_softc;
256 
257 struct carp_vhaddr {
258           uint32_t            vha_flags;          /* CARP_VHAF_ */
259           struct in_ifaddr    *vha_ia;  /* carp address */
260           struct in_ifaddr    *vha_iaback;        /* backing address */
261           TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264 
265 struct netmsg_carp {
266           struct netmsg_base  base;
267           struct ifnet                  *nc_carpdev;
268           struct carp_softc   *nc_softc;
269           void                          *nc_data;
270           size_t                        nc_datalen;
271 };
272 
273 struct carp_softc {
274           struct arpcom                  arpcom;
275           struct ifnet                  *sc_carpdev;        /* parent interface */
276           struct carp_vhaddr_list        sc_vha_list;       /* virtual addr list */
277 
278           const struct in_ifaddr        *sc_ia;             /* primary iface address v4 */
279           struct ip_moptions   sc_imo;
280 
281 #ifdef INET6
282           struct in6_ifaddr   *sc_ia6;  /* primary iface address v6 */
283           struct ip6_moptions            sc_im6o;
284 #endif /* INET6 */
285 
286           enum { INIT = 0, BACKUP, MASTER }
287                                          sc_state;
288           boolean_t            sc_dead;
289 
290           int                            sc_suppress;
291 
292           int                            sc_sendad_errors;
293 #define   CARP_SENDAD_MAX_ERRORS        3
294           int                            sc_sendad_success;
295 #define   CARP_SENDAD_MIN_SUCCESS 3
296 
297           int                            sc_vhid;
298           int                            sc_advskew;
299           int                            sc_naddrs;         /* actually used IPv4 vha */
300           int                            sc_naddrs6;
301           int                            sc_advbase;        /* seconds */
302           int                            sc_init_counter;
303           uint64_t             sc_counter;
304 
305           /* authentication */
306 #define CARP_HMAC_PAD         64
307           unsigned char                  sc_key[CARP_KEY_LEN];
308           unsigned char                  sc_pad[CARP_HMAC_PAD];
309           SHA1_CTX             sc_sha1;
310 
311           struct callout                 sc_ad_tmo;         /* advertisement timeout */
312           struct netmsg_carp   sc_ad_msg;         /* adv timeout netmsg */
313           struct callout                 sc_md_tmo;         /* ip4 master down timeout */
314           struct callout                 sc_md6_tmo;        /* ip6 master down timeout */
315           struct netmsg_carp   sc_md_msg;         /* master down timeout netmsg */
316 
317           LIST_ENTRY(carp_softc)         sc_next; /* Interface clue */
318 };
319 
320 #define sc_if       arpcom.ac_if
321 
322 struct carp_softc_container {
323           TAILQ_ENTRY(carp_softc_container) scc_link;
324           struct carp_softc   *scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327 
328 SYSCTL_DECL(_net_inet_carp);
329 
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0, 1 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339 SYSCTL_INT(_net_inet_carp, CARPCTL_SETROUTE, setroute, CTLFLAG_RW,
340     &carp_opts[CARPCTL_SETROUTE], 0, "set route");
341 
342 static int carp_suppress_preempt = 0;
343 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
344     &carp_suppress_preempt, 0, "Preemption is suppressed");
345 
346 static int carp_prio_ad = 1;
347 SYSCTL_INT(_net_inet_carp, OID_AUTO, prio_ad, CTLFLAG_RD,
348     &carp_prio_ad, 0, "Prioritize advertisement packet");
349 
350 static struct carpstats carpstats;
351 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
352     &carpstats, carpstats,
353     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
354 
355 #define   CARP_LOG(...)       do {                                    \
356           if (carp_opts[CARPCTL_LOG] > 0)                             \
357                     log(LOG_INFO, __VA_ARGS__);             \
358 } while (0)
359 
360 #define   CARP_DEBUG(...)     do {                                    \
361           if (carp_opts[CARPCTL_LOG] > 1)                             \
362                     log(LOG_DEBUG, __VA_ARGS__);            \
363 } while (0)
364 
365 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
366 
367 static void         carp_hmac_prepare(struct carp_softc *);
368 static void         carp_hmac_generate(struct carp_softc *, uint32_t *,
369                         unsigned char *);
370 static int          carp_hmac_verify(struct carp_softc *, uint32_t *,
371                         unsigned char *);
372 static void         carp_setroute(struct carp_softc *, int);
373 static void         carp_proto_input_c(struct carp_softc *, struct mbuf *,
374                         struct carp_header *, sa_family_t);
375 static int          carp_clone_create(struct if_clone *, int, caddr_t, caddr_t);
376 static int          carp_clone_destroy(struct ifnet *);
377 static void         carp_detach(struct carp_softc *, boolean_t, boolean_t);
378 static void         carp_prepare_ad(struct carp_softc *, struct carp_header *);
379 static void         carp_send_ad_all(void);
380 static void         carp_send_ad_timeout(void *);
381 static void         carp_send_ad(struct carp_softc *);
382 static void         carp_send_arp(struct carp_softc *);
383 static void         carp_master_down_timeout(void *);
384 static void         carp_master_down(struct carp_softc *);
385 static void         carp_setrun(struct carp_softc *, sa_family_t);
386 static void         carp_set_state(struct carp_softc *, int);
387 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
388 
389 static void         carp_init(void *);
390 static int          carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
391 static int          carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
392                         struct rtentry *);
393 static void         carp_start(struct ifnet *, struct ifaltq_subque *);
394 
395 static void         carp_multicast_cleanup(struct carp_softc *);
396 static void         carp_add_addr(struct carp_softc *, struct ifaddr *);
397 static void         carp_del_addr(struct carp_softc *, struct ifaddr *);
398 static void         carp_config_addr(struct carp_softc *, struct ifaddr *);
399 static void         carp_link_addrs(struct carp_softc *, struct ifnet *,
400                         struct ifaddr *);
401 static void         carp_unlink_addrs(struct carp_softc *, struct ifnet *,
402                         struct ifaddr *);
403 static void         carp_update_addrs(struct carp_softc *, struct ifaddr *);
404 
405 static int          carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
406                         struct in_ifaddr *);
407 static int          carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
408                         struct ifnet *, struct in_ifaddr *, int);
409 static void         carp_deactivate_vhaddr(struct carp_softc *,
410                         struct carp_vhaddr *, boolean_t);
411 static int          carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
412 static void         carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
413                         boolean_t);
414 
415 #ifdef foo
416 static void         carp_sc_state(struct carp_softc *);
417 #endif
418 #ifdef INET6
419 static void         carp_send_na(struct carp_softc *);
420 #ifdef notyet
421 static int          carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
422 static int          carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
423 #endif
424 static void         carp_multicast6_cleanup(struct carp_softc *);
425 #endif
426 static void         carp_stop(struct carp_softc *, boolean_t);
427 static void         carp_suspend(struct carp_softc *, boolean_t);
428 static void         carp_ioctl_stop(struct carp_softc *);
429 static int          carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
430 static void         carp_ioctl_ifcap(struct carp_softc *, int);
431 static int          carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
432 static int          carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
433 static int          carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
434 
435 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
436 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
437 static void         carp_if_free(struct carp_if *);
438 
439 static void         carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
440                                   struct ifaddr *);
441 static void         carp_ifdetach(void *, struct ifnet *);
442 
443 static void         carp_ifdetach_dispatch(netmsg_t);
444 static void         carp_clone_destroy_dispatch(netmsg_t);
445 static void         carp_init_dispatch(netmsg_t);
446 static void         carp_ioctl_stop_dispatch(netmsg_t);
447 static void         carp_ioctl_setvh_dispatch(netmsg_t);
448 static void         carp_ioctl_ifcap_dispatch(netmsg_t);
449 static void         carp_ioctl_getvh_dispatch(netmsg_t);
450 static void         carp_ioctl_getdevname_dispatch(netmsg_t);
451 static void         carp_ioctl_getvhaddr_dispatch(netmsg_t);
452 static void         carp_send_ad_timeout_dispatch(netmsg_t);
453 static void         carp_master_down_timeout_dispatch(netmsg_t);
454 
455 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
456 
457 static LIST_HEAD(, carp_softc) carpif_list;
458 
459 static struct if_clone carp_cloner =
460 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
461                          0, IF_MAXUNIT);
462 
463 static const uint8_t          carp_etheraddr[ETHER_ADDR_LEN] =
464           { 0, 0, 0x5e, 0, 1, 0 };
465 
466 static eventhandler_tag carp_ifdetach_event;
467 static eventhandler_tag carp_ifaddr_event;
468 
469 static __inline void
carp_insert_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha_new)470 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
471 {
472           struct carp_vhaddr *vha;
473           u_long new_addr, addr;
474 
475           KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
476 
477           /*
478            * Virtual address list is sorted; smaller one first
479            */
480           new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
481 
482           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
483                     addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
484 
485                     if (addr > new_addr)
486                               break;
487           }
488           if (vha == NULL)
489                     TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
490           else
491                     TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
492           vha_new->vha_flags |= CARP_VHAF_ONLIST;
493 }
494 
495 static __inline void
carp_remove_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha)496 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
497 {
498           KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
499           vha->vha_flags &= ~CARP_VHAF_ONLIST;
500           TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
501 }
502 
503 static void
carp_hmac_prepare(struct carp_softc * sc)504 carp_hmac_prepare(struct carp_softc *sc)
505 {
506           uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
507           uint8_t vhid = sc->sc_vhid & 0xff;
508           int i;
509 #ifdef INET6
510           struct ifaddr_container *ifac;
511           struct in6_addr in6;
512 #endif
513 #ifdef INET
514           struct carp_vhaddr *vha;
515 #endif
516 
517           /* XXX: possible race here */
518 
519           /* compute ipad from key */
520           bzero(sc->sc_pad, sizeof(sc->sc_pad));
521           bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
522           for (i = 0; i < sizeof(sc->sc_pad); i++)
523                     sc->sc_pad[i] ^= 0x36;
524 
525           /* precompute first part of inner hash */
526           SHA1Init(&sc->sc_sha1);
527           SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
528           SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
529           SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
530           SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
531 #ifdef INET
532           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
533                     SHA1Update(&sc->sc_sha1,
534                         (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
535                         sizeof(struct in_addr));
536           }
537 #endif /* INET */
538 #ifdef INET6
539           TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
540                     struct ifaddr *ifa = ifac->ifa;
541 
542                     if (ifa->ifa_addr->sa_family == AF_INET6) {
543                               in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
544                               in6_clearscope(&in6);
545                               SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
546                     }
547           }
548 #endif /* INET6 */
549 
550           /* convert ipad to opad */
551           for (i = 0; i < sizeof(sc->sc_pad); i++)
552                     sc->sc_pad[i] ^= 0x36 ^ 0x5c;
553 }
554 
555 static void
carp_hmac_generate(struct carp_softc * sc,uint32_t counter[2],unsigned char md[20])556 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
557     unsigned char md[20])
558 {
559           SHA1_CTX sha1ctx;
560 
561           /* fetch first half of inner hash */
562           bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
563 
564           SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
565           SHA1Final(md, &sha1ctx);
566 
567           /* outer hash */
568           SHA1Init(&sha1ctx);
569           SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
570           SHA1Update(&sha1ctx, md, 20);
571           SHA1Final(md, &sha1ctx);
572 }
573 
574 static int
carp_hmac_verify(struct carp_softc * sc,uint32_t counter[2],unsigned char md[20])575 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
576     unsigned char md[20])
577 {
578           unsigned char md2[20];
579 
580           carp_hmac_generate(sc, counter, md2);
581           return (bcmp(md, md2, sizeof(md2)));
582 }
583 
584 static void
carp_setroute(struct carp_softc * sc,int cmd)585 carp_setroute(struct carp_softc *sc, int cmd)
586 {
587 #ifdef INET6
588           struct ifaddr_container *ifac;
589 #endif
590           struct carp_vhaddr *vha;
591 
592           KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
593 
594           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
595                     if (vha->vha_iaback == NULL)
596                               continue;
597                     if (cmd == RTM_DELETE)
598                               carp_delroute_vhaddr(sc, vha, FALSE);
599                     else
600                               carp_addroute_vhaddr(sc, vha);
601           }
602 
603 #ifdef INET6
604           TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
605                     struct ifaddr *ifa = ifac->ifa;
606 
607                     if (ifa->ifa_addr->sa_family == AF_INET6) {
608                               if (cmd == RTM_ADD)
609                                         in6_ifaddloop(ifa);
610                               else
611                                         in6_ifremloop(ifa);
612                     }
613           }
614 #endif /* INET6 */
615 }
616 
617 static int
carp_clone_create(struct if_clone * ifc,int unit,caddr_t params __unused,caddr_t data __unused)618 carp_clone_create(struct if_clone *ifc, int unit,
619                       caddr_t params __unused, caddr_t data __unused)
620 {
621           struct carp_softc *sc;
622           struct ifnet *ifp;
623 
624           sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
625           ifp = &sc->sc_if;
626 
627           sc->sc_suppress = 0;
628           sc->sc_advbase = CARP_DFLTINTV;
629           sc->sc_vhid = -1;   /* required setting */
630           sc->sc_advskew = 0;
631           sc->sc_init_counter = 1;
632           sc->sc_naddrs = 0;
633           sc->sc_naddrs6 = 0;
634 
635           TAILQ_INIT(&sc->sc_vha_list);
636 
637 #ifdef INET6
638           sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
639 #endif
640 
641           callout_init_mp(&sc->sc_ad_tmo);
642           netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
643               MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
644           sc->sc_ad_msg.nc_softc = sc;
645 
646           callout_init_mp(&sc->sc_md_tmo);
647           callout_init_mp(&sc->sc_md6_tmo);
648           netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
649               MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
650           sc->sc_md_msg.nc_softc = sc;
651 
652           if_initname(ifp, CARP_IFNAME, unit);
653           ifp->if_softc = sc;
654           ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
655           ifp->if_init = carp_init;
656           ifp->if_ioctl = carp_ioctl;
657           ifp->if_start = carp_start;
658 
659           ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
660           ifp->if_capenable = ifp->if_capabilities;
661           /*
662            * Leave if_hwassist as it is; if_hwassist will be
663            * setup when this carp interface has parent.
664            */
665 
666           ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
667           ifq_set_ready(&ifp->if_snd);
668 
669           ether_ifattach(ifp, carp_etheraddr, NULL);
670 
671           ifp->if_type = IFT_CARP;
672           ifp->if_output = carp_output;
673 
674           lwkt_gettoken(&carp_listtok);
675           LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
676           lwkt_reltoken(&carp_listtok);
677 
678           return (0);
679 }
680 
681 static void
carp_clone_destroy_dispatch(netmsg_t msg)682 carp_clone_destroy_dispatch(netmsg_t msg)
683 {
684           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
685           struct carp_softc *sc = cmsg->nc_softc;
686 
687           sc->sc_dead = TRUE;
688           carp_detach(sc, TRUE, FALSE);
689 
690           callout_cancel(&sc->sc_ad_tmo);
691           callout_cancel(&sc->sc_md_tmo);
692           callout_cancel(&sc->sc_md6_tmo);
693 
694           crit_enter();
695           lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
696           lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
697           crit_exit();
698 
699           lwkt_replymsg(&cmsg->base.lmsg, 0);
700 }
701 
702 static int
carp_clone_destroy(struct ifnet * ifp)703 carp_clone_destroy(struct ifnet *ifp)
704 {
705           struct carp_softc *sc = ifp->if_softc;
706           struct netmsg_carp cmsg;
707 
708           bzero(&cmsg, sizeof(cmsg));
709           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
710               carp_clone_destroy_dispatch);
711           cmsg.nc_softc = sc;
712 
713           lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
714 
715           lwkt_gettoken(&carp_listtok);
716           LIST_REMOVE(sc, sc_next);
717           lwkt_reltoken(&carp_listtok);
718 
719           bpfdetach(ifp);
720           if_detach(ifp);
721 
722           KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
723           kfree(sc, M_CARP);
724 
725           return 0;
726 }
727 
728 static struct carp_if *
carp_if_remove(struct carp_if * ocif,struct carp_softc * sc)729 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
730 {
731           struct carp_softc_container *oscc, *scc;
732           struct carp_if *cif;
733           int count = 0;
734 #ifdef INVARIANTS
735           int found = 0;
736 #endif
737 
738           TAILQ_FOREACH(oscc, ocif, scc_link) {
739                     ++count;
740 #ifdef INVARIANTS
741                     if (oscc->scc_softc == sc)
742                               found = 1;
743 #endif
744           }
745           KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
746 
747           if (count == 1) {
748                     /* Last one is going to be unlinked */
749                     return NULL;
750           }
751 
752           cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
753           TAILQ_INIT(cif);
754 
755           TAILQ_FOREACH(oscc, ocif, scc_link) {
756                     if (oscc->scc_softc == sc)
757                               continue;
758 
759                     scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
760                     scc->scc_softc = oscc->scc_softc;
761                     TAILQ_INSERT_TAIL(cif, scc, scc_link);
762           }
763 
764           return cif;
765 }
766 
767 static struct carp_if *
carp_if_insert(struct carp_if * ocif,struct carp_softc * sc)768 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
769 {
770           struct carp_softc_container *oscc;
771           int onlist;
772 
773           onlist = 0;
774           if (ocif != NULL) {
775                     TAILQ_FOREACH(oscc, ocif, scc_link) {
776                               if (oscc->scc_softc == sc)
777                                         onlist = 1;
778                     }
779           }
780 
781 #ifdef INVARIANTS
782           if (sc->sc_carpdev != NULL) {
783                     KASSERT(onlist, ("%s is not on %s carp list",
784                         sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
785           } else {
786                     KASSERT(!onlist, ("%s is already on carp list",
787                         sc->sc_if.if_xname));
788           }
789 #endif
790 
791           if (!onlist) {
792                     struct carp_if *cif;
793                     struct carp_softc_container *new_scc, *scc;
794                     int inserted = 0;
795 
796                     cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
797                     TAILQ_INIT(cif);
798 
799                     new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
800                     new_scc->scc_softc = sc;
801 
802                     if (ocif != NULL) {
803                               TAILQ_FOREACH(oscc, ocif, scc_link) {
804                                         if (!inserted &&
805                                             oscc->scc_softc->sc_vhid > sc->sc_vhid) {
806                                                   TAILQ_INSERT_TAIL(cif, new_scc,
807                                                       scc_link);
808                                                   inserted = 1;
809                                         }
810 
811                                         scc = kmalloc(sizeof(*scc), M_CARP,
812                                             M_WAITOK | M_ZERO);
813                                         scc->scc_softc = oscc->scc_softc;
814                                         TAILQ_INSERT_TAIL(cif, scc, scc_link);
815                               }
816                     }
817                     if (!inserted)
818                               TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
819 
820                     return cif;
821           } else {
822                     return ocif;
823           }
824 }
825 
826 static void
carp_if_free(struct carp_if * cif)827 carp_if_free(struct carp_if *cif)
828 {
829           struct carp_softc_container *scc;
830 
831           while ((scc = TAILQ_FIRST(cif)) != NULL) {
832                     TAILQ_REMOVE(cif, scc, scc_link);
833                     kfree(scc, M_CARP);
834           }
835           kfree(cif, M_CARP);
836 }
837 
838 static void
carp_detach(struct carp_softc * sc,boolean_t detach,boolean_t del_iaback)839 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
840 {
841           carp_suspend(sc, detach);
842 
843           carp_multicast_cleanup(sc);
844 #ifdef INET6
845           carp_multicast6_cleanup(sc);
846 #endif
847 
848           if (!sc->sc_dead && detach) {
849                     struct carp_vhaddr *vha;
850 
851                     TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
852                               carp_deactivate_vhaddr(sc, vha, del_iaback);
853                     KKASSERT(sc->sc_naddrs == 0);
854           }
855 
856           if (sc->sc_carpdev != NULL) {
857                     struct ifnet *ifp = sc->sc_carpdev;
858                     struct carp_if *ocif = ifp->if_carp;
859 
860                     ifp->if_carp = carp_if_remove(ocif, sc);
861                     KASSERT(ifp->if_carp != ocif,
862                         ("%s carp_if_remove failed", __func__));
863 
864                     sc->sc_carpdev = NULL;
865                     sc->sc_ia = NULL;
866                     sc->arpcom.ac_if.if_hwassist = 0;
867 
868                     /*
869                      * Make sure that all protocol threads see the
870                      * sc_carpdev and if_carp changes
871                      */
872                     netmsg_service_sync();
873 
874                     if (ifp->if_carp == NULL) {
875                               /*
876                                * No more carp interfaces using
877                                * ifp as the backing interface,
878                                * move it out of promiscous mode.
879                                */
880                               ifpromisc(ifp, 0);
881                     }
882 
883                     /*
884                      * The old carp list could be safely free now,
885                      * since no one can access it.
886                      */
887                     carp_if_free(ocif);
888           }
889 }
890 
891 static void
carp_ifdetach_dispatch(netmsg_t msg)892 carp_ifdetach_dispatch(netmsg_t msg)
893 {
894           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
895           struct ifnet *ifp = cmsg->nc_carpdev;
896 
897           while (ifp->if_carp) {
898                     struct carp_softc_container *scc;
899 
900                     scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
901                     carp_detach(scc->scc_softc, TRUE, TRUE);
902           }
903           lwkt_replymsg(&cmsg->base.lmsg, 0);
904 }
905 
906 /* Detach an interface from the carp. */
907 static void
carp_ifdetach(void * arg __unused,struct ifnet * ifp)908 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
909 {
910           struct netmsg_carp cmsg;
911 
912           ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
913 
914           bzero(&cmsg, sizeof(cmsg));
915           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
916               carp_ifdetach_dispatch);
917           cmsg.nc_carpdev = ifp;
918 
919           lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
920 }
921 
922 /*
923  * process input packet.
924  * we have rearranged checks order compared to the rfc,
925  * but it seems more efficient this way or not possible otherwise.
926  */
927 int
carp_proto_input(struct mbuf ** mp,int * offp,int proto)928 carp_proto_input(struct mbuf **mp, int *offp, int proto)
929 {
930           struct mbuf *m = *mp;
931           struct ip *ip = mtod(m, struct ip *);
932           struct ifnet *ifp = m->m_pkthdr.rcvif;
933           struct carp_header *ch;
934           struct carp_softc *sc;
935           int len, iphlen;
936 
937           iphlen = *offp;
938           *mp = NULL;
939 
940           carpstats.carps_ipackets++;
941 
942           if (!carp_opts[CARPCTL_ALLOW]) {
943                     m_freem(m);
944                     goto back;
945           }
946 
947           /* Check if received on a valid carp interface */
948           if (ifp->if_type != IFT_CARP) {
949                     carpstats.carps_badif++;
950                     CARP_LOG("carp_proto_input: packet received on non-carp "
951                         "interface: %s\n", ifp->if_xname);
952                     m_freem(m);
953                     goto back;
954           }
955 
956           if (!CARP_IS_RUNNING(ifp)) {
957                     carpstats.carps_badif++;
958                     CARP_LOG("carp_proto_input: packet received on stopped carp "
959                         "interface: %s\n", ifp->if_xname);
960                     m_freem(m);
961                     goto back;
962           }
963 
964           sc = ifp->if_softc;
965           if (sc->sc_carpdev == NULL) {
966                     carpstats.carps_badif++;
967                     CARP_LOG("carp_proto_input: packet received on defunc carp "
968                         "interface: %s\n", ifp->if_xname);
969                     m_freem(m);
970                     goto back;
971           }
972 
973           if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
974                     carpstats.carps_badif++;
975                     CARP_LOG("carp_proto_input: non-mcast packet on "
976                         "interface: %s\n", ifp->if_xname);
977                     m_freem(m);
978                     goto back;
979           }
980 
981           /* Verify that the IP TTL is CARP_DFLTTL. */
982           if (ip->ip_ttl != CARP_DFLTTL) {
983                     carpstats.carps_badttl++;
984                     CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
985                         ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
986                     m_freem(m);
987                     goto back;
988           }
989 
990           /* Minimal CARP packet size */
991           len = iphlen + sizeof(*ch);
992 
993           /*
994            * Verify that the received packet length is
995            * not less than the CARP header
996            */
997           if (m->m_pkthdr.len < len) {
998                     carpstats.carps_badlen++;
999                     CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
1000                         ifp->if_xname);
1001                     m_freem(m);
1002                     goto back;
1003           }
1004 
1005           /* Make sure that CARP header is contiguous */
1006           if (len > m->m_len) {
1007                     m = m_pullup(m, len);
1008                     if (m == NULL) {
1009                               carpstats.carps_hdrops++;
1010                               CARP_LOG("carp_proto_input: m_pullup failed\n");
1011                               goto back;
1012                     }
1013                     ip = mtod(m, struct ip *);
1014           }
1015           ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1016 
1017           /* Verify the CARP checksum */
1018           if (in_cksum_skip(m, len, iphlen)) {
1019                     carpstats.carps_badsum++;
1020                     CARP_LOG("carp_proto_input: checksum failed on %s\n",
1021                         ifp->if_xname);
1022                     m_freem(m);
1023                     goto back;
1024           }
1025           carp_proto_input_c(sc, m, ch, AF_INET);
1026 back:
1027           return(IPPROTO_DONE);
1028 }
1029 
1030 #ifdef INET6
1031 int
carp6_proto_input(struct mbuf ** mp,int * offp,int proto)1032 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1033 {
1034           struct mbuf *m = *mp;
1035           struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1036           struct ifnet *ifp = m->m_pkthdr.rcvif;
1037           struct carp_header *ch;
1038           struct carp_softc *sc;
1039           u_int len;
1040 
1041           carpstats.carps_ipackets6++;
1042 
1043           if (!carp_opts[CARPCTL_ALLOW]) {
1044                     m_freem(m);
1045                     goto back;
1046           }
1047 
1048           /* check if received on a valid carp interface */
1049           if (ifp->if_type != IFT_CARP) {
1050                     carpstats.carps_badif++;
1051                     CARP_LOG("carp6_proto_input: packet received on non-carp "
1052                         "interface: %s\n", ifp->if_xname);
1053                     m_freem(m);
1054                     goto back;
1055           }
1056 
1057           if (!CARP_IS_RUNNING(ifp)) {
1058                     carpstats.carps_badif++;
1059                     CARP_LOG("carp_proto_input: packet received on stopped carp "
1060                         "interface: %s\n", ifp->if_xname);
1061                     m_freem(m);
1062                     goto back;
1063           }
1064 
1065           sc = ifp->if_softc;
1066           if (sc->sc_carpdev == NULL) {
1067                     carpstats.carps_badif++;
1068                     CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1069                         "interface: %s\n", ifp->if_xname);
1070                     m_freem(m);
1071                     goto back;
1072           }
1073 
1074           /* verify that the IP TTL is 255 */
1075           if (ip6->ip6_hlim != CARP_DFLTTL) {
1076                     carpstats.carps_badttl++;
1077                     CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1078                         ip6->ip6_hlim, ifp->if_xname);
1079                     m_freem(m);
1080                     goto back;
1081           }
1082 
1083           /* verify that we have a complete carp packet */
1084           len = m->m_len;
1085           IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1086           if (ch == NULL) {
1087                     carpstats.carps_badlen++;
1088                     CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1089                     goto back;
1090           }
1091 
1092           /* verify the CARP checksum */
1093           if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1094                     carpstats.carps_badsum++;
1095                     CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1096                         ifp->if_xname);
1097                     m_freem(m);
1098                     goto back;
1099           }
1100 
1101           carp_proto_input_c(sc, m, ch, AF_INET6);
1102 back:
1103           return (IPPROTO_DONE);
1104 }
1105 #endif /* INET6 */
1106 
1107 static void
carp_proto_input_c(struct carp_softc * sc,struct mbuf * m,struct carp_header * ch,sa_family_t af)1108 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1109     struct carp_header *ch, sa_family_t af)
1110 {
1111           struct ifnet *cifp;
1112           uint64_t tmp_counter;
1113           struct timeval sc_tv, ch_tv;
1114 
1115           if (sc->sc_vhid != ch->carp_vhid) {
1116                     /*
1117                      * CARP uses multicast, however, multicast packets
1118                      * are tapped to all CARP interfaces on the physical
1119                      * interface receiving the CARP packets, so we don't
1120                      * update any stats here.
1121                      */
1122                     m_freem(m);
1123                     return;
1124           }
1125           cifp = &sc->sc_if;
1126 
1127           /* verify the CARP version. */
1128           if (ch->carp_version != CARP_VERSION) {
1129                     carpstats.carps_badver++;
1130                     CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1131                                ch->carp_version);
1132                     m_freem(m);
1133                     return;
1134           }
1135 
1136           /* verify the hash */
1137           if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1138                     carpstats.carps_badauth++;
1139                     CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1140                     m_freem(m);
1141                     return;
1142           }
1143 
1144           tmp_counter = ntohl(ch->carp_counter[0]);
1145           tmp_counter = tmp_counter<<32;
1146           tmp_counter += ntohl(ch->carp_counter[1]);
1147 
1148           /* XXX Replay protection goes here */
1149 
1150           sc->sc_init_counter = 0;
1151           sc->sc_counter = tmp_counter;
1152 
1153           sc_tv.tv_sec = sc->sc_advbase;
1154           if (carp_suppress_preempt && sc->sc_advskew <  240)
1155                     sc_tv.tv_usec = 240 * 1000000 / 256;
1156           else
1157                     sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1158           ch_tv.tv_sec = ch->carp_advbase;
1159           ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1160 
1161           switch (sc->sc_state) {
1162           case INIT:
1163                     break;
1164 
1165           case MASTER:
1166                     /*
1167                      * If we receive an advertisement from a master who's going to
1168                      * be more frequent than us, go into BACKUP state.
1169                      */
1170                     if (timevalcmp(&sc_tv, &ch_tv, >) ||
1171                         timevalcmp(&sc_tv, &ch_tv, ==)) {
1172                               callout_stop(&sc->sc_ad_tmo);
1173                               CARP_DEBUG("%s: MASTER -> BACKUP "
1174                                  "(more frequent advertisement received)\n",
1175                                  cifp->if_xname);
1176                               carp_set_state(sc, BACKUP);
1177                               carp_setrun(sc, 0);
1178                               if (carp_opts[CARPCTL_SETROUTE])
1179                                         carp_setroute(sc, RTM_DELETE);
1180                     }
1181                     break;
1182 
1183           case BACKUP:
1184                     /*
1185                      * If we're pre-empting masters who advertise slower than us,
1186                      * and this one claims to be slower, treat him as down.
1187                      */
1188                     if (carp_opts[CARPCTL_PREEMPT] &&
1189                         timevalcmp(&sc_tv, &ch_tv, <)) {
1190                               CARP_DEBUG("%s: BACKUP -> MASTER "
1191                                   "(preempting a slower master)\n", cifp->if_xname);
1192                               carp_master_down(sc);
1193                               break;
1194                     }
1195 
1196                     /*
1197                      *  If the master is going to advertise at such a low frequency
1198                      *  that he's guaranteed to time out, we'd might as well just
1199                      *  treat him as timed out now.
1200                      */
1201                     sc_tv.tv_sec = sc->sc_advbase * 3;
1202                     if (timevalcmp(&sc_tv, &ch_tv, <)) {
1203                               CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1204                                            cifp->if_xname);
1205                               carp_master_down(sc);
1206                               break;
1207                     }
1208 
1209                     /*
1210                      * Otherwise, we reset the counter and wait for the next
1211                      * advertisement.
1212                      */
1213                     carp_setrun(sc, af);
1214                     break;
1215           }
1216           m_freem(m);
1217 }
1218 
1219 struct mbuf *
carp_input(void * v,struct mbuf * m)1220 carp_input(void *v, struct mbuf *m)
1221 {
1222           struct carp_if *cif = v;
1223           struct ether_header *eh;
1224           struct carp_softc_container *scc;
1225           struct ifnet *ifp;
1226 
1227           eh = mtod(m, struct ether_header *);
1228 
1229           ifp = carp_forus(cif, eh->ether_dhost);
1230           if (ifp != NULL) {
1231                     ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1232                     return NULL;
1233           }
1234 
1235           if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1236                     return m;
1237 
1238           /*
1239            * XXX Should really check the list of multicast addresses
1240            * for each CARP interface _before_ copying.
1241            */
1242           TAILQ_FOREACH(scc, cif, scc_link) {
1243                     struct carp_softc *sc = scc->scc_softc;
1244                     struct mbuf *m0;
1245 
1246                     if ((sc->sc_if.if_flags & IFF_UP) == 0)
1247                               continue;
1248 
1249                     m0 = m_dup(m, M_NOWAIT);
1250                     if (m0 == NULL)
1251                               continue;
1252 
1253                     ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1254           }
1255           return m;
1256 }
1257 
1258 static void
carp_prepare_ad(struct carp_softc * sc,struct carp_header * ch)1259 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1260 {
1261           if (sc->sc_init_counter) {
1262                     /* this could also be seconds since unix epoch */
1263                     sc->sc_counter = karc4random();
1264                     sc->sc_counter = sc->sc_counter << 32;
1265                     sc->sc_counter += karc4random();
1266           } else {
1267                     sc->sc_counter++;
1268           }
1269 
1270           ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1271           ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1272 
1273           carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1274 }
1275 
1276 static void
carp_send_ad_all(void)1277 carp_send_ad_all(void)
1278 {
1279           struct carp_softc *sc;
1280 
1281           LIST_FOREACH(sc, &carpif_list, sc_next) {
1282                     if (sc->sc_carpdev == NULL)
1283                               continue;
1284 
1285                     if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1286                               carp_send_ad(sc);
1287           }
1288 }
1289 
1290 static void
carp_send_ad_timeout(void * xsc)1291 carp_send_ad_timeout(void *xsc)
1292 {
1293           struct carp_softc *sc = xsc;
1294           struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1295 
1296           KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1297               __func__, mycpuid));
1298 
1299           crit_enter();
1300           if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1301                     lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1302           crit_exit();
1303 }
1304 
1305 static void
carp_send_ad_timeout_dispatch(netmsg_t msg)1306 carp_send_ad_timeout_dispatch(netmsg_t msg)
1307 {
1308           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1309           struct carp_softc *sc = cmsg->nc_softc;
1310 
1311           /* Reply ASAP */
1312           crit_enter();
1313           lwkt_replymsg(&cmsg->base.lmsg, 0);
1314           crit_exit();
1315 
1316           carp_send_ad(sc);
1317 }
1318 
1319 static void
carp_send_ad(struct carp_softc * sc)1320 carp_send_ad(struct carp_softc *sc)
1321 {
1322           struct ifnet *cifp = &sc->sc_if;
1323           struct carp_header ch;
1324           struct timeval tv;
1325           struct carp_header *ch_ptr;
1326           struct mbuf *m;
1327           int len, advbase, advskew;
1328 
1329           if (!CARP_IS_RUNNING(cifp)) {
1330                     /* Bow out */
1331                     advbase = 255;
1332                     advskew = 255;
1333           } else {
1334                     advbase = sc->sc_advbase;
1335                     if (!carp_suppress_preempt || sc->sc_advskew > 240)
1336                               advskew = sc->sc_advskew;
1337                     else
1338                               advskew = 240;
1339                     tv.tv_sec = advbase;
1340                     tv.tv_usec = advskew * 1000000 / 256;
1341           }
1342 
1343           ch.carp_version = CARP_VERSION;
1344           ch.carp_type = CARP_ADVERTISEMENT;
1345           ch.carp_vhid = sc->sc_vhid;
1346           ch.carp_advbase = advbase;
1347           ch.carp_advskew = advskew;
1348           ch.carp_authlen = 7;          /* XXX DEFINE */
1349           ch.carp_pad1 = 0;   /* must be zero */
1350           ch.carp_cksum = 0;
1351 
1352 #ifdef INET
1353           if (sc->sc_ia != NULL) {
1354                     struct ip *ip;
1355 
1356                     MGETHDR(m, M_NOWAIT, MT_HEADER);
1357                     if (m == NULL) {
1358                               IFNET_STAT_INC(cifp, oerrors, 1);
1359                               carpstats.carps_onomem++;
1360                               /* XXX maybe less ? */
1361                               if (advbase != 255 || advskew != 255)
1362                                         callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1363                                             carp_send_ad_timeout, sc);
1364                               return;
1365                     }
1366                     len = sizeof(*ip) + sizeof(ch);
1367                     m->m_pkthdr.len = len;
1368                     m->m_pkthdr.rcvif = NULL;
1369                     m->m_len = len;
1370                     MH_ALIGN(m, m->m_len);
1371                     m->m_flags |= M_MCAST;
1372                     if (carp_prio_ad)
1373                               m->m_flags |= M_PRIO;
1374                     ip = mtod(m, struct ip *);
1375                     ip->ip_v = IPVERSION;
1376                     ip->ip_hl = sizeof(*ip) >> 2;
1377                     ip->ip_tos = IPTOS_LOWDELAY;
1378                     ip->ip_len = htons(len);
1379                     ip->ip_id = ip_newid();
1380                     ip->ip_off = htons(IP_DF);
1381                     ip->ip_ttl = CARP_DFLTTL;
1382                     ip->ip_p = IPPROTO_CARP;
1383                     ip->ip_sum = 0;
1384                     ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1385                     ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1386 
1387                     ch_ptr = (struct carp_header *)(&ip[1]);
1388                     bcopy(&ch, ch_ptr, sizeof(ch));
1389                     carp_prepare_ad(sc, ch_ptr);
1390                     ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1391 
1392                     getmicrotime(&cifp->if_lastchange);
1393                     IFNET_STAT_INC(cifp, opackets, 1);
1394                     IFNET_STAT_INC(cifp, obytes, len);
1395                     carpstats.carps_opackets++;
1396 
1397                     if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1398                               IFNET_STAT_INC(cifp, oerrors, 1);
1399                               if (sc->sc_sendad_errors < INT_MAX)
1400                                         sc->sc_sendad_errors++;
1401                               if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1402                                         carp_suppress_preempt++;
1403                                         if (carp_suppress_preempt == 1) {
1404                                                   carp_send_ad_all();
1405                                         }
1406                               }
1407                               sc->sc_sendad_success = 0;
1408                     } else {
1409                               if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1410                                         if (++sc->sc_sendad_success >=
1411                                             CARP_SENDAD_MIN_SUCCESS) {
1412                                                   carp_suppress_preempt--;
1413                                                   sc->sc_sendad_errors = 0;
1414                                         }
1415                               } else {
1416                                         sc->sc_sendad_errors = 0;
1417                               }
1418                     }
1419           }
1420 #endif /* INET */
1421 #ifdef INET6
1422           if (sc->sc_ia6) {
1423                     struct ip6_hdr *ip6;
1424 
1425                     MGETHDR(m, M_NOWAIT, MT_HEADER);
1426                     if (m == NULL) {
1427                               IFNET_STAT_INC(cifp, oerrors, 1);
1428                               carpstats.carps_onomem++;
1429                               /* XXX maybe less ? */
1430                               if (advbase != 255 || advskew != 255)
1431                                         callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1432                                             carp_send_ad_timeout, sc);
1433                               return;
1434                     }
1435                     len = sizeof(*ip6) + sizeof(ch);
1436                     m->m_pkthdr.len = len;
1437                     m->m_pkthdr.rcvif = NULL;
1438                     m->m_len = len;
1439                     MH_ALIGN(m, m->m_len);
1440                     m->m_flags |= M_MCAST;
1441                     ip6 = mtod(m, struct ip6_hdr *);
1442                     bzero(ip6, sizeof(*ip6));
1443                     ip6->ip6_vfc |= IPV6_VERSION;
1444                     ip6->ip6_hlim = CARP_DFLTTL;
1445                     ip6->ip6_nxt = IPPROTO_CARP;
1446                     bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1447                         sizeof(struct in6_addr));
1448                     /* set the multicast destination */
1449 
1450                     ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1451                     ip6->ip6_dst.s6_addr8[15] = 0x12;
1452                     if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1453                               IFNET_STAT_INC(cifp, oerrors, 1);
1454                               m_freem(m);
1455                               CARP_LOG("%s: in6_setscope failed\n", __func__);
1456                               return;
1457                     }
1458 
1459                     ch_ptr = (struct carp_header *)(&ip6[1]);
1460                     bcopy(&ch, ch_ptr, sizeof(ch));
1461                     carp_prepare_ad(sc, ch_ptr);
1462                     ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1463 
1464                     getmicrotime(&cifp->if_lastchange);
1465                     IFNET_STAT_INC(cifp, opackets, 1);
1466                     IFNET_STAT_INC(cifp, obytes, len);
1467                     carpstats.carps_opackets6++;
1468 
1469                     if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1470                               IFNET_STAT_INC(cifp, oerrors, 1);
1471                               if (sc->sc_sendad_errors < INT_MAX)
1472                                         sc->sc_sendad_errors++;
1473                               if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1474                                         carp_suppress_preempt++;
1475                                         if (carp_suppress_preempt == 1) {
1476                                                   carp_send_ad_all();
1477                                         }
1478                               }
1479                               sc->sc_sendad_success = 0;
1480                     } else {
1481                               if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1482                                         if (++sc->sc_sendad_success >=
1483                                             CARP_SENDAD_MIN_SUCCESS) {
1484                                                   carp_suppress_preempt--;
1485                                                   sc->sc_sendad_errors = 0;
1486                                         }
1487                               } else {
1488                                         sc->sc_sendad_errors = 0;
1489                               }
1490                     }
1491           }
1492 #endif /* INET6 */
1493 
1494           if (advbase != 255 || advskew != 255)
1495                     callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1496                         carp_send_ad_timeout, sc);
1497 }
1498 
1499 /*
1500  * Broadcast a gratuitous ARP request containing
1501  * the virtual router MAC address for each IP address
1502  * associated with the virtual router.
1503  */
1504 static void
carp_send_arp(struct carp_softc * sc)1505 carp_send_arp(struct carp_softc *sc)
1506 {
1507           const struct carp_vhaddr *vha;
1508 
1509           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1510                     if (vha->vha_iaback == NULL)
1511                               continue;
1512                     arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1513           }
1514 }
1515 
1516 #ifdef INET6
1517 static void
carp_send_na(struct carp_softc * sc)1518 carp_send_na(struct carp_softc *sc)
1519 {
1520           struct ifaddr_container *ifac;
1521           struct in6_addr *in6;
1522           static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1523 
1524           TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1525                     struct ifaddr *ifa = ifac->ifa;
1526 
1527                     if (ifa->ifa_addr->sa_family != AF_INET6)
1528                               continue;
1529 
1530                     in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1531                     nd6_na_output(sc->sc_carpdev, &mcast, in6,
1532                         ND_NA_FLAG_OVERRIDE, 1, NULL);
1533                     DELAY(1000);        /* XXX */
1534           }
1535 }
1536 #endif /* INET6 */
1537 
1538 #ifdef notyet
1539 static __inline const struct carp_vhaddr *
carp_find_addr(const struct carp_softc * sc,const struct in_addr * addr)1540 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1541 {
1542           struct carp_vhaddr *vha;
1543 
1544           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1545                     if (vha->vha_iaback == NULL)
1546                               continue;
1547 
1548                     if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1549                               return vha;
1550           }
1551           return NULL;
1552 }
1553 
1554 static int
carp_iamatch_balance(const struct carp_if * cif,const struct in_addr * itaddr,const struct in_addr * isaddr,uint8_t ** enaddr)1555 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1556                          const struct in_addr *isaddr, uint8_t **enaddr)
1557 {
1558           const struct carp_softc *vh;
1559           int index, count = 0;
1560 
1561           /*
1562            * XXX proof of concept implementation.
1563            * We use the source ip to decide which virtual host should
1564            * handle the request. If we're master of that virtual host,
1565            * then we respond, otherwise, just drop the arp packet on
1566            * the floor.
1567            */
1568 
1569           TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1570                     if (!CARP_IS_RUNNING(&vh->sc_if))
1571                               continue;
1572 
1573                     if (carp_find_addr(vh, itaddr) != NULL)
1574                               count++;
1575           }
1576           if (count == 0)
1577                     return 0;
1578 
1579           /* this should be a hash, like pf_hash() */
1580           index = ntohl(isaddr->s_addr) % count;
1581           count = 0;
1582 
1583           TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1584                     if (!CARP_IS_RUNNING(&vh->sc_if))
1585                               continue;
1586 
1587                     if (carp_find_addr(vh, itaddr) == NULL)
1588                               continue;
1589 
1590                     if (count == index) {
1591                               if (vh->sc_state == MASTER) {
1592                                         *enaddr = IF_LLADDR(&vh->sc_if);
1593                                         return 1;
1594                               } else {
1595                                         return 0;
1596                               }
1597                     }
1598                     count++;
1599           }
1600           return 0;
1601 }
1602 #endif
1603 
1604 int
carp_iamatch(const struct in_ifaddr * ia)1605 carp_iamatch(const struct in_ifaddr *ia)
1606 {
1607           const struct carp_softc *sc = ia->ia_ifp->if_softc;
1608 
1609           ASSERT_NETISR0;
1610 
1611 #ifdef notyet
1612           if (carp_opts[CARPCTL_ARPBALANCE])
1613                     return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1614 #endif
1615 
1616           if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1617                     return 0;
1618 
1619           return 1;
1620 }
1621 
1622 #ifdef INET6
1623 struct ifaddr *
carp_iamatch6(void * v,struct in6_addr * taddr)1624 carp_iamatch6(void *v, struct in6_addr *taddr)
1625 {
1626 #ifdef foo
1627           struct carp_if *cif = v;
1628           struct carp_softc *vh;
1629 
1630           TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1631                     struct ifaddr_container *ifac;
1632 
1633                     TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1634                                     ifa_link) {
1635                               struct ifaddr *ifa = ifac->ifa;
1636 
1637                               if (IN6_ARE_ADDR_EQUAL(taddr,
1638                                   &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1639                                   CARP_IS_RUNNING(&vh->sc_if) &&
1640                                   vh->sc_state == MASTER) {
1641                                         return (ifa);
1642                               }
1643                     }
1644           }
1645 #endif
1646           return (NULL);
1647 }
1648 
1649 void *
carp_macmatch6(void * v,struct mbuf * m,const struct in6_addr * taddr)1650 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1651 {
1652 #ifdef foo
1653           struct m_tag *mtag;
1654           struct carp_if *cif = v;
1655           struct carp_softc *sc;
1656 
1657           TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1658                     struct ifaddr_container *ifac;
1659 
1660                     TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1661                                     ifa_link) {
1662                               struct ifaddr *ifa = ifac->ifa;
1663 
1664                               if (IN6_ARE_ADDR_EQUAL(taddr,
1665                                   &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1666                                   CARP_IS_RUNNING(&sc->sc_if)) {
1667                                         struct ifnet *ifp = &sc->sc_if;
1668 
1669                                         mtag = m_tag_get(PACKET_TAG_CARP,
1670                                             sizeof(struct ifnet *), M_NOWAIT);
1671                                         if (mtag == NULL) {
1672                                                   /* better a bit than nothing */
1673                                                   return (IF_LLADDR(ifp));
1674                                         }
1675                                         bcopy(&ifp, (caddr_t)(mtag + 1),
1676                                             sizeof(struct ifnet *));
1677                                         m_tag_prepend(m, mtag);
1678 
1679                                         return (IF_LLADDR(ifp));
1680                               }
1681                     }
1682           }
1683 #endif
1684           return (NULL);
1685 }
1686 #endif
1687 
1688 static struct ifnet *
carp_forus(struct carp_if * cif,const uint8_t * dhost)1689 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1690 {
1691           struct carp_softc_container *scc;
1692 
1693           if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1694                     return NULL;
1695 
1696           TAILQ_FOREACH(scc, cif, scc_link) {
1697                     struct carp_softc *sc = scc->scc_softc;
1698                     struct ifnet *ifp = &sc->sc_if;
1699 
1700                     if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1701                         !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1702                               return ifp;
1703           }
1704           return NULL;
1705 }
1706 
1707 static void
carp_master_down_timeout(void * xsc)1708 carp_master_down_timeout(void *xsc)
1709 {
1710           struct carp_softc *sc = xsc;
1711           struct netmsg_carp *cmsg = &sc->sc_md_msg;
1712 
1713           KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1714               __func__, mycpuid));
1715 
1716           crit_enter();
1717           if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1718                     lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1719           crit_exit();
1720 }
1721 
1722 static void
carp_master_down_timeout_dispatch(netmsg_t msg)1723 carp_master_down_timeout_dispatch(netmsg_t msg)
1724 {
1725           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1726           struct carp_softc *sc = cmsg->nc_softc;
1727 
1728           /* Reply ASAP */
1729           crit_enter();
1730           lwkt_replymsg(&cmsg->base.lmsg, 0);
1731           crit_exit();
1732 
1733           CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1734                        sc->sc_if.if_xname);
1735           carp_master_down(sc);
1736 }
1737 
1738 static void
carp_master_down(struct carp_softc * sc)1739 carp_master_down(struct carp_softc *sc)
1740 {
1741           switch (sc->sc_state) {
1742           case INIT:
1743                     kprintf("%s: master_down event in INIT state\n",
1744                               sc->sc_if.if_xname);
1745                     break;
1746 
1747           case MASTER:
1748                     break;
1749 
1750           case BACKUP:
1751                     carp_set_state(sc, MASTER);
1752                     carp_send_ad(sc);
1753                     carp_send_arp(sc);
1754 #ifdef INET6
1755                     carp_send_na(sc);
1756 #endif /* INET6 */
1757                     carp_setrun(sc, 0);
1758                     if (carp_opts[CARPCTL_SETROUTE])
1759                               carp_setroute(sc, RTM_ADD);
1760                     break;
1761           }
1762 }
1763 
1764 /*
1765  * When in backup state, af indicates whether to reset the master down timer
1766  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1767  */
1768 static void
carp_setrun(struct carp_softc * sc,sa_family_t af)1769 carp_setrun(struct carp_softc *sc, sa_family_t af)
1770 {
1771           struct ifnet *cifp = &sc->sc_if;
1772           struct timeval tv;
1773 
1774           if (sc->sc_carpdev == NULL) {
1775                     carp_set_state(sc, INIT);
1776                     return;
1777           }
1778 
1779           if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1780               (sc->sc_naddrs || sc->sc_naddrs6)) {
1781                     /* Nothing */
1782           } else {
1783                     if (carp_opts[CARPCTL_SETROUTE])
1784                               carp_setroute(sc, RTM_DELETE);
1785                     return;
1786           }
1787 
1788           switch (sc->sc_state) {
1789           case INIT:
1790                     if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1791                               carp_send_ad(sc);
1792                               carp_send_arp(sc);
1793 #ifdef INET6
1794                               carp_send_na(sc);
1795 #endif /* INET6 */
1796                               CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1797                                            cifp->if_xname);
1798                               carp_set_state(sc, MASTER);
1799                               if (carp_opts[CARPCTL_SETROUTE])
1800                                         carp_setroute(sc, RTM_ADD);
1801                     } else {
1802                               CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1803                               carp_set_state(sc, BACKUP);
1804                               if (carp_opts[CARPCTL_SETROUTE])
1805                                         carp_setroute(sc, RTM_DELETE);
1806                               carp_setrun(sc, 0);
1807                     }
1808                     break;
1809 
1810           case BACKUP:
1811                     callout_stop(&sc->sc_ad_tmo);
1812                     tv.tv_sec = 3 * sc->sc_advbase;
1813                     tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1814                     switch (af) {
1815 #ifdef INET
1816                     case AF_INET:
1817                               callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1818                                   carp_master_down_timeout, sc);
1819                               break;
1820 #endif /* INET */
1821 #ifdef INET6
1822                     case AF_INET6:
1823                               callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1824                                   carp_master_down_timeout, sc);
1825                               break;
1826 #endif /* INET6 */
1827                     default:
1828                               if (sc->sc_naddrs)
1829                                         callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1830                                             carp_master_down_timeout, sc);
1831                               if (sc->sc_naddrs6)
1832                                         callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1833                                             carp_master_down_timeout, sc);
1834                               break;
1835                     }
1836                     break;
1837 
1838           case MASTER:
1839                     tv.tv_sec = sc->sc_advbase;
1840                     tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1841                     callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1842                         carp_send_ad_timeout, sc);
1843                     break;
1844           }
1845 }
1846 
1847 static void
carp_multicast_cleanup(struct carp_softc * sc)1848 carp_multicast_cleanup(struct carp_softc *sc)
1849 {
1850           struct ip_moptions *imo = &sc->sc_imo;
1851 
1852           if (imo->imo_num_memberships == 0)
1853                     return;
1854           KKASSERT(imo->imo_num_memberships == 1);
1855 
1856           in_delmulti(imo->imo_membership[0]);
1857           imo->imo_membership[0] = NULL;
1858           imo->imo_num_memberships = 0;
1859           imo->imo_multicast_ifp = NULL;
1860 }
1861 
1862 #ifdef INET6
1863 static void
carp_multicast6_cleanup(struct carp_softc * sc)1864 carp_multicast6_cleanup(struct carp_softc *sc)
1865 {
1866           struct ip6_moptions *im6o = &sc->sc_im6o;
1867 
1868           while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1869                     struct in6_multi_mship *imm =
1870                         LIST_FIRST(&im6o->im6o_memberships);
1871 
1872                     LIST_REMOVE(imm, i6mm_chain);
1873                     in6_leavegroup(imm);
1874           }
1875           im6o->im6o_multicast_ifp = NULL;
1876 }
1877 #endif
1878 
1879 static void
carp_ioctl_getvhaddr_dispatch(netmsg_t msg)1880 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1881 {
1882           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1883           struct carp_softc *sc = cmsg->nc_softc;
1884           const struct carp_vhaddr *vha;
1885           struct ifcarpvhaddr *carpa, *carpa0;
1886           int count, len, error = 0;
1887 
1888           count = 0;
1889           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1890                     ++count;
1891 
1892           if (cmsg->nc_datalen == 0) {
1893                     cmsg->nc_datalen = count * sizeof(*carpa);
1894                     goto back;
1895           } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1896                     cmsg->nc_datalen = 0;
1897                     goto back;
1898           }
1899           len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1900           KKASSERT(len >= sizeof(*carpa));
1901 
1902           carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1903           if (carpa == NULL) {
1904                     error = ENOMEM;
1905                     goto back;
1906           }
1907 
1908           count = 0;
1909           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1910                     if (len < sizeof(*carpa))
1911                               break;
1912 
1913                     carpa->carpa_flags = vha->vha_flags;
1914                     carpa->carpa_addr.sin_family = AF_INET;
1915                     carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1916 
1917                     carpa->carpa_baddr.sin_family = AF_INET;
1918                     if (vha->vha_iaback == NULL) {
1919                               carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1920                     } else {
1921                               carpa->carpa_baddr.sin_addr =
1922                               vha->vha_iaback->ia_addr.sin_addr;
1923                     }
1924 
1925                     ++carpa;
1926                     ++count;
1927                     len -= sizeof(*carpa);
1928           }
1929           cmsg->nc_datalen = sizeof(*carpa) * count;
1930           KKASSERT(cmsg->nc_datalen > 0);
1931 
1932           cmsg->nc_data = carpa0;
1933 
1934 back:
1935           lwkt_replymsg(&cmsg->base.lmsg, error);
1936 }
1937 
1938 static int
carp_ioctl_getvhaddr(struct carp_softc * sc,struct ifdrv * ifd)1939 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1940 {
1941           struct ifnet *ifp = &sc->arpcom.ac_if;
1942           struct netmsg_carp cmsg;
1943           int error;
1944 
1945           ASSERT_IFNET_SERIALIZED_ALL(ifp);
1946           ifnet_deserialize_all(ifp);
1947 
1948           bzero(&cmsg, sizeof(cmsg));
1949           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1950               carp_ioctl_getvhaddr_dispatch);
1951           cmsg.nc_softc = sc;
1952           cmsg.nc_datalen = ifd->ifd_len;
1953 
1954           error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1955 
1956           if (!error) {
1957                     if (cmsg.nc_data != NULL) {
1958                               error = copyout(cmsg.nc_data, ifd->ifd_data,
1959                                   cmsg.nc_datalen);
1960                               kfree(cmsg.nc_data, M_TEMP);
1961                     }
1962                     ifd->ifd_len = cmsg.nc_datalen;
1963           } else {
1964                     KASSERT(cmsg.nc_data == NULL,
1965                         ("%s temp vhaddr is alloc upon error", __func__));
1966           }
1967 
1968           ifnet_serialize_all(ifp);
1969           return error;
1970 }
1971 
1972 static int
carp_config_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,struct in_ifaddr * ia_del)1973 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1974     struct in_ifaddr *ia_del)
1975 {
1976           struct ifnet *ifp;
1977           struct in_ifaddr *ia_if;
1978           const struct in_ifaddr *ia_vha;
1979           struct in_ifaddr_container *iac;
1980           int own, ia_match_carpdev;
1981 
1982           KKASSERT(vha->vha_ia != NULL);
1983           ia_vha = vha->vha_ia;
1984 
1985           ia_if = NULL;
1986           own = 0;
1987           ia_match_carpdev = 0;
1988           TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1989                     struct in_ifaddr *ia = iac->ia;
1990 
1991                     if (ia == ia_del)
1992                               continue;
1993 
1994                     if (ia->ia_ifp->if_type == IFT_CARP)
1995                               continue;
1996 
1997                     if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1998                               continue;
1999 
2000                     /* and, yeah, we need a multicast-capable iface too */
2001                     if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
2002                               continue;
2003 
2004                     if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
2005                         ia_vha->ia_subnet == ia->ia_subnet) {
2006                               if (ia_vha->ia_addr.sin_addr.s_addr ==
2007                                   ia->ia_addr.sin_addr.s_addr)
2008                                         own = 1;
2009                               if (ia_if == NULL) {
2010                                         ia_if = ia;
2011                               } else if (sc->sc_carpdev != NULL &&
2012                                   sc->sc_carpdev == ia->ia_ifp) {
2013                                         ia_if = ia;
2014                                         if (ia_if->ia_flags & IFA_ROUTE) {
2015                                                   /*
2016                                                    * Address with prefix route
2017                                                    * is prefered
2018                                                    */
2019                                                   break;
2020                                         }
2021                                         ia_match_carpdev = 1;
2022                               } else if (!ia_match_carpdev) {
2023                                         if (ia->ia_flags & IFA_ROUTE) {
2024                                                   /*
2025                                                    * Address with prefix route
2026                                                    * is prefered over others.
2027                                                    */
2028                                                   ia_if = ia;
2029                                         }
2030                               }
2031                     }
2032           }
2033 
2034           carp_deactivate_vhaddr(sc, vha, FALSE);
2035           if (!ia_if)
2036                     return ENOENT;
2037 
2038           ifp = ia_if->ia_ifp;
2039 
2040           /* XXX Don't allow parent iface to be changed */
2041           if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2042                     return EEXIST;
2043 
2044           return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2045 }
2046 
2047 static void
carp_add_addr(struct carp_softc * sc,struct ifaddr * carp_ifa)2048 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2049 {
2050           struct carp_vhaddr *vha_new;
2051           struct in_ifaddr *carp_ia;
2052 #ifdef INVARIANTS
2053           struct carp_vhaddr *vha;
2054 #endif
2055 
2056           KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2057           carp_ia = ifatoia(carp_ifa);
2058 
2059 #ifdef INVARIANTS
2060           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2061                     KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2062 #endif
2063 
2064           vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2065           vha_new->vha_ia = carp_ia;
2066           carp_insert_vhaddr(sc, vha_new);
2067 
2068           if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2069                     /*
2070                      * If the above configuration fails, it may only mean
2071                      * that the new address is problematic.  However, the
2072                      * carp(4) interface may already have several working
2073                      * addresses.  Since the expected behaviour of
2074                      * SIOC[AS]IFADDR is to put the NIC into working state,
2075                      * we try starting the state machine manually here with
2076                      * the hope that the carp(4)'s previously working
2077                      * addresses still could be brought up.
2078                      */
2079                     carp_hmac_prepare(sc);
2080                     carp_set_state(sc, INIT);
2081                     carp_setrun(sc, 0);
2082           }
2083 }
2084 
2085 static void
carp_del_addr(struct carp_softc * sc,struct ifaddr * carp_ifa)2086 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2087 {
2088           struct carp_vhaddr *vha;
2089           struct in_ifaddr *carp_ia;
2090 
2091           KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2092           carp_ia = ifatoia(carp_ifa);
2093 
2094           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2095                     KKASSERT(vha->vha_ia != NULL);
2096                     if (vha->vha_ia == carp_ia)
2097                               break;
2098           }
2099           KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2100 
2101           /*
2102            * Remove the vhaddr from the list before deactivating
2103            * the vhaddr, so that the HMAC could be correctly
2104            * updated in carp_deactivate_vhaddr()
2105            */
2106           carp_remove_vhaddr(sc, vha);
2107 
2108           carp_deactivate_vhaddr(sc, vha, FALSE);
2109           kfree(vha, M_CARP);
2110 }
2111 
2112 static void
carp_config_addr(struct carp_softc * sc,struct ifaddr * carp_ifa)2113 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2114 {
2115           struct carp_vhaddr *vha;
2116           struct in_ifaddr *carp_ia;
2117 
2118           KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2119           carp_ia = ifatoia(carp_ifa);
2120 
2121           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2122                     KKASSERT(vha->vha_ia != NULL);
2123                     if (vha->vha_ia == carp_ia)
2124                               break;
2125           }
2126           KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2127 
2128           /* Remove then reinsert, to keep the vhaddr list sorted */
2129           carp_remove_vhaddr(sc, vha);
2130           carp_insert_vhaddr(sc, vha);
2131 
2132           if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2133                     /* See the comment in carp_add_addr() */
2134                     carp_hmac_prepare(sc);
2135                     carp_set_state(sc, INIT);
2136                     carp_setrun(sc, 0);
2137           }
2138 }
2139 
2140 #ifdef notyet
2141 
2142 #ifdef INET6
2143 static int
carp_set_addr6(struct carp_softc * sc,struct sockaddr_in6 * sin6)2144 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2145 {
2146           struct ifnet *ifp;
2147           struct carp_if *cif;
2148           struct in6_ifaddr *ia, *ia_if;
2149           struct ip6_moptions *im6o = &sc->sc_im6o;
2150           struct in6_multi_mship *imm;
2151           struct in6_addr in6;
2152           int own, error;
2153 
2154           if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2155                     carp_setrun(sc, 0);
2156                     return (0);
2157           }
2158 
2159           /* we have to do it by hands to check we won't match on us */
2160           ia_if = NULL; own = 0;
2161           for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2162                     int i;
2163 
2164                     for (i = 0; i < 4; i++) {
2165                               if ((sin6->sin6_addr.s6_addr32[i] &
2166                                   ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2167                                   (ia->ia_addr.sin6_addr.s6_addr32[i] &
2168                                   ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2169                                         break;
2170                     }
2171                     /* and, yeah, we need a multicast-capable iface too */
2172                     if (ia->ia_ifp != &sc->sc_if &&
2173                         (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2174                         (i == 4)) {
2175                               if (!ia_if)
2176                                         ia_if = ia;
2177                               if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2178                                   &ia->ia_addr.sin6_addr))
2179                                         own++;
2180                     }
2181           }
2182 
2183           if (!ia_if)
2184                     return (EADDRNOTAVAIL);
2185           ia = ia_if;
2186           ifp = ia->ia_ifp;
2187 
2188           if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2189               (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2190                     return (EADDRNOTAVAIL);
2191 
2192           if (!sc->sc_naddrs6) {
2193                     im6o->im6o_multicast_ifp = ifp;
2194 
2195                     /* join CARP multicast address */
2196                     bzero(&in6, sizeof(in6));
2197                     in6.s6_addr16[0] = htons(0xff02);
2198                     in6.s6_addr8[15] = 0x12;
2199                     if (in6_setscope(&in6, ifp, NULL) != 0)
2200                               goto cleanup;
2201                     if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2202                               goto cleanup;
2203                     LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2204 
2205                     /* join solicited multicast address */
2206                     bzero(&in6, sizeof(in6));
2207                     in6.s6_addr16[0] = htons(0xff02);
2208                     in6.s6_addr32[1] = 0;
2209                     in6.s6_addr32[2] = htonl(1);
2210                     in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2211                     in6.s6_addr8[12] = 0xff;
2212                     if (in6_setscope(&in6, ifp, NULL) != 0)
2213                               goto cleanup;
2214                     if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2215                               goto cleanup;
2216                     LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2217           }
2218 
2219 #ifdef foo
2220           if (!ifp->if_carp) {
2221                     cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2222 
2223                     if ((error = ifpromisc(ifp, 1))) {
2224                               kfree(cif, M_CARP);
2225                               goto cleanup;
2226                     }
2227 
2228                     TAILQ_INIT(&cif->vhif_vrs);
2229                     ifp->if_carp = cif;
2230           } else {
2231                     struct carp_softc *vr;
2232 
2233                     cif = ifp->if_carp;
2234                     TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2235                               if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2236                                         error = EINVAL;
2237                                         goto cleanup;
2238                               }
2239                     }
2240           }
2241 #endif
2242           sc->sc_ia6 = ia;
2243           sc->sc_carpdev = ifp;
2244 
2245 #ifdef foo
2246           { /* XXX prevent endless loop if already in queue */
2247           struct carp_softc *vr, *after = NULL;
2248           int myself = 0;
2249           cif = ifp->if_carp;
2250 
2251           TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2252                     if (vr == sc)
2253                               myself = 1;
2254                     if (vr->sc_vhid < sc->sc_vhid)
2255                               after = vr;
2256           }
2257 
2258           if (!myself) {
2259                     /* We're trying to keep things in order */
2260                     if (after == NULL)
2261                               TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2262                     else
2263                               TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2264           }
2265           }
2266 #endif
2267 
2268           sc->sc_naddrs6++;
2269           if (own)
2270                     sc->sc_advskew = 0;
2271           carp_sc_state(sc);
2272           carp_setrun(sc, 0);
2273 
2274           return (0);
2275 
2276 cleanup:
2277           /* clean up multicast memberships */
2278           if (!sc->sc_naddrs6) {
2279                     while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2280                               imm = LIST_FIRST(&im6o->im6o_memberships);
2281                               LIST_REMOVE(imm, i6mm_chain);
2282                               in6_leavegroup(imm);
2283                     }
2284           }
2285           return (error);
2286 }
2287 
2288 static int
carp_del_addr6(struct carp_softc * sc,struct sockaddr_in6 * sin6)2289 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2290 {
2291           int error = 0;
2292 
2293           if (!--sc->sc_naddrs6) {
2294                     struct carp_if *cif = sc->sc_carpdev->if_carp;
2295                     struct ip6_moptions *im6o = &sc->sc_im6o;
2296 
2297                     callout_stop(&sc->sc_ad_tmo);
2298                     sc->sc_vhid = -1;
2299                     while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2300                               struct in6_multi_mship *imm =
2301                                   LIST_FIRST(&im6o->im6o_memberships);
2302 
2303                               LIST_REMOVE(imm, i6mm_chain);
2304                               in6_leavegroup(imm);
2305                     }
2306                     im6o->im6o_multicast_ifp = NULL;
2307 #ifdef foo
2308                     TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2309                     if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2310                               sc->sc_carpdev->if_carp = NULL;
2311                               kfree(cif, M_IFADDR);
2312                     }
2313 #endif
2314           }
2315           return (error);
2316 }
2317 #endif /* INET6 */
2318 
2319 #endif
2320 
2321 static int
carp_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr,struct ucred * cr)2322 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2323 {
2324           struct carp_softc *sc = ifp->if_softc;
2325           struct ifreq *ifr = (struct ifreq *)addr;
2326           struct ifdrv *ifd = (struct ifdrv *)addr;
2327           int error = 0;
2328 
2329           ASSERT_IFNET_SERIALIZED_ALL(ifp);
2330 
2331           switch (cmd) {
2332           case SIOCSIFFLAGS:
2333                     if (ifp->if_flags & IFF_UP) {
2334                               if ((ifp->if_flags & IFF_RUNNING) == 0)
2335                                         carp_init(sc);
2336                     } else if (ifp->if_flags & IFF_RUNNING) {
2337                               carp_ioctl_stop(sc);
2338                     }
2339                     break;
2340 
2341           case SIOCSIFCAP:
2342                     carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2343                     break;
2344 
2345           case SIOCSVH:
2346                     error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2347                     break;
2348 
2349           case SIOCGVH:
2350                     error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2351                     break;
2352 
2353           case SIOCGDRVSPEC:
2354                     switch (ifd->ifd_cmd) {
2355                     case CARPGDEVNAME:
2356                               error = carp_ioctl_getdevname(sc, ifd);
2357                               break;
2358 
2359                     case CARPGVHADDR:
2360                               error = carp_ioctl_getvhaddr(sc, ifd);
2361                               break;
2362 
2363                     default:
2364                               error = EINVAL;
2365                               break;
2366                     }
2367                     break;
2368 
2369           default:
2370                     error = ether_ioctl(ifp, cmd, addr);
2371                     break;
2372           }
2373 
2374           return error;
2375 }
2376 
2377 static void
carp_ioctl_stop_dispatch(netmsg_t msg)2378 carp_ioctl_stop_dispatch(netmsg_t msg)
2379 {
2380           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2381           struct carp_softc *sc = cmsg->nc_softc;
2382 
2383           carp_stop(sc, FALSE);
2384           lwkt_replymsg(&cmsg->base.lmsg, 0);
2385 }
2386 
2387 static void
carp_ioctl_stop(struct carp_softc * sc)2388 carp_ioctl_stop(struct carp_softc *sc)
2389 {
2390           struct ifnet *ifp = &sc->arpcom.ac_if;
2391           struct netmsg_carp cmsg;
2392 
2393           ASSERT_IFNET_SERIALIZED_ALL(ifp);
2394 
2395           ifnet_deserialize_all(ifp);
2396 
2397           bzero(&cmsg, sizeof(cmsg));
2398           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2399               carp_ioctl_stop_dispatch);
2400           cmsg.nc_softc = sc;
2401 
2402           lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2403 
2404           ifnet_serialize_all(ifp);
2405 }
2406 
2407 static void
carp_ioctl_setvh_dispatch(netmsg_t msg)2408 carp_ioctl_setvh_dispatch(netmsg_t msg)
2409 {
2410           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2411           struct carp_softc *sc = cmsg->nc_softc;
2412           struct ifnet *ifp = &sc->arpcom.ac_if;
2413           const struct carpreq *carpr = cmsg->nc_data;
2414           int error;
2415 
2416           error = 1;
2417           if ((ifp->if_flags & IFF_RUNNING) &&
2418               sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2419                     switch (carpr->carpr_state) {
2420                     case BACKUP:
2421                               callout_stop(&sc->sc_ad_tmo);
2422                               carp_set_state(sc, BACKUP);
2423                               carp_setrun(sc, 0);
2424                               if (carp_opts[CARPCTL_SETROUTE])
2425                                         carp_setroute(sc, RTM_DELETE);
2426                               break;
2427 
2428                     case MASTER:
2429                               carp_master_down(sc);
2430                               break;
2431 
2432                     default:
2433                               break;
2434                     }
2435           }
2436           if (carpr->carpr_vhid > 0) {
2437                     if (carpr->carpr_vhid > 255) {
2438                               error = EINVAL;
2439                               goto back;
2440                     }
2441                     if (sc->sc_carpdev) {
2442                               struct carp_if *cif = sc->sc_carpdev->if_carp;
2443                               struct carp_softc_container *scc;
2444 
2445                               TAILQ_FOREACH(scc, cif, scc_link) {
2446                                         struct carp_softc *vr = scc->scc_softc;
2447 
2448                                         if (vr != sc &&
2449                                             vr->sc_vhid == carpr->carpr_vhid) {
2450                                                   error = EEXIST;
2451                                                   goto back;
2452                                         }
2453                               }
2454                     }
2455                     sc->sc_vhid = carpr->carpr_vhid;
2456 
2457                     IF_LLADDR(ifp)[5] = sc->sc_vhid;
2458                     bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2459                         ETHER_ADDR_LEN);
2460 
2461                     error--;
2462           }
2463           if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2464                     if (carpr->carpr_advskew >= 255) {
2465                               error = EINVAL;
2466                               goto back;
2467                     }
2468                     if (carpr->carpr_advbase > 255) {
2469                               error = EINVAL;
2470                               goto back;
2471                     }
2472                     sc->sc_advbase = carpr->carpr_advbase;
2473                     sc->sc_advskew = carpr->carpr_advskew;
2474                     error--;
2475           }
2476           bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2477           if (error > 0) {
2478                     error = EINVAL;
2479           } else {
2480                     error = 0;
2481                     carp_setrun(sc, 0);
2482           }
2483 back:
2484           carp_hmac_prepare(sc);
2485 
2486           lwkt_replymsg(&cmsg->base.lmsg, error);
2487 }
2488 
2489 static int
carp_ioctl_setvh(struct carp_softc * sc,void * udata,struct ucred * cr)2490 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2491 {
2492           struct ifnet *ifp = &sc->arpcom.ac_if;
2493           struct netmsg_carp cmsg;
2494           struct carpreq carpr;
2495           int error;
2496 
2497           ASSERT_IFNET_SERIALIZED_ALL(ifp);
2498           ifnet_deserialize_all(ifp);
2499 
2500           error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT |
2501                                             __SYSCAP_NULLCRED);
2502           if (error)
2503                     goto back;
2504 
2505           error = copyin(udata, &carpr, sizeof(carpr));
2506           if (error)
2507                     goto back;
2508 
2509           bzero(&cmsg, sizeof(cmsg));
2510           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2511               carp_ioctl_setvh_dispatch);
2512           cmsg.nc_softc = sc;
2513           cmsg.nc_data = &carpr;
2514 
2515           error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2516 
2517 back:
2518           ifnet_serialize_all(ifp);
2519           return error;
2520 }
2521 
2522 static void
carp_ioctl_ifcap_dispatch(netmsg_t msg)2523 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2524 {
2525           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2526           struct carp_softc *sc = cmsg->nc_softc;
2527           struct ifnet *ifp = &sc->arpcom.ac_if;
2528           int reqcap = *((const int *)(cmsg->nc_data));
2529           int mask;
2530 
2531           mask = reqcap ^ ifp->if_capenable;
2532           if (mask & IFCAP_TXCSUM) {
2533                     ifp->if_capenable ^= IFCAP_TXCSUM;
2534                     if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2535                         sc->sc_carpdev != NULL) {
2536                               ifp->if_hwassist |=
2537                                   (sc->sc_carpdev->if_hwassist &
2538                                    (CSUM_IP | CSUM_UDP | CSUM_TCP));
2539                     } else {
2540                               ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2541                     }
2542           }
2543           if (mask & IFCAP_TSO) {
2544                     ifp->if_capenable ^= IFCAP_TSO;
2545                     if ((ifp->if_capenable & IFCAP_TSO) &&
2546                         sc->sc_carpdev != NULL) {
2547                               ifp->if_hwassist |=
2548                                   (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2549                     } else {
2550                               ifp->if_hwassist &= ~CSUM_TSO;
2551                     }
2552           }
2553 
2554           lwkt_replymsg(&cmsg->base.lmsg, 0);
2555 }
2556 
2557 static void
carp_ioctl_ifcap(struct carp_softc * sc,int reqcap)2558 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2559 {
2560           struct ifnet *ifp = &sc->arpcom.ac_if;
2561           struct netmsg_carp cmsg;
2562 
2563           ASSERT_IFNET_SERIALIZED_ALL(ifp);
2564           ifnet_deserialize_all(ifp);
2565 
2566           bzero(&cmsg, sizeof(cmsg));
2567           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2568               carp_ioctl_ifcap_dispatch);
2569           cmsg.nc_softc = sc;
2570           cmsg.nc_data = &reqcap;
2571 
2572           lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2573 
2574           ifnet_serialize_all(ifp);
2575 }
2576 
2577 static void
carp_ioctl_getvh_dispatch(netmsg_t msg)2578 carp_ioctl_getvh_dispatch(netmsg_t msg)
2579 {
2580           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2581           struct carp_softc *sc = cmsg->nc_softc;
2582           struct carpreq *carpr = cmsg->nc_data;
2583 
2584           carpr->carpr_state = sc->sc_state;
2585           carpr->carpr_vhid = sc->sc_vhid;
2586           carpr->carpr_advbase = sc->sc_advbase;
2587           carpr->carpr_advskew = sc->sc_advskew;
2588           bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2589 
2590           lwkt_replymsg(&cmsg->base.lmsg, 0);
2591 }
2592 
2593 static int
carp_ioctl_getvh(struct carp_softc * sc,void * udata,struct ucred * cr)2594 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2595 {
2596           struct ifnet *ifp = &sc->arpcom.ac_if;
2597           struct netmsg_carp cmsg;
2598           struct carpreq carpr;
2599           int error;
2600 
2601           ASSERT_IFNET_SERIALIZED_ALL(ifp);
2602           ifnet_deserialize_all(ifp);
2603 
2604           bzero(&cmsg, sizeof(cmsg));
2605           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2606               carp_ioctl_getvh_dispatch);
2607           cmsg.nc_softc = sc;
2608           cmsg.nc_data = &carpr;
2609 
2610           lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2611 
2612           error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT |
2613                                             __SYSCAP_NULLCRED);
2614           if (error)
2615                     bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2616 
2617           error = copyout(&carpr, udata, sizeof(carpr));
2618 
2619           ifnet_serialize_all(ifp);
2620           return error;
2621 }
2622 
2623 static void
carp_ioctl_getdevname_dispatch(netmsg_t msg)2624 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2625 {
2626           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2627           struct carp_softc *sc = cmsg->nc_softc;
2628           char *devname = cmsg->nc_data;
2629 
2630           bzero(devname, IFNAMSIZ);
2631           if (sc->sc_carpdev != NULL)
2632                     strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2633 
2634           lwkt_replymsg(&cmsg->base.lmsg, 0);
2635 }
2636 
2637 static int
carp_ioctl_getdevname(struct carp_softc * sc,struct ifdrv * ifd)2638 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2639 {
2640           struct ifnet *ifp = &sc->arpcom.ac_if;
2641           struct netmsg_carp cmsg;
2642           char devname[IFNAMSIZ];
2643           int error;
2644 
2645           ASSERT_IFNET_SERIALIZED_ALL(ifp);
2646 
2647           if (ifd->ifd_len != sizeof(devname))
2648                     return EINVAL;
2649 
2650           ifnet_deserialize_all(ifp);
2651 
2652           bzero(&cmsg, sizeof(cmsg));
2653           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2654               carp_ioctl_getdevname_dispatch);
2655           cmsg.nc_softc = sc;
2656           cmsg.nc_data = devname;
2657 
2658           lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2659 
2660           error = copyout(devname, ifd->ifd_data, sizeof(devname));
2661 
2662           ifnet_serialize_all(ifp);
2663           return error;
2664 }
2665 
2666 static void
carp_init_dispatch(netmsg_t msg)2667 carp_init_dispatch(netmsg_t msg)
2668 {
2669           struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2670           struct carp_softc *sc = cmsg->nc_softc;
2671 
2672           sc->sc_if.if_flags |= IFF_RUNNING;
2673           carp_hmac_prepare(sc);
2674           carp_set_state(sc, INIT);
2675           carp_setrun(sc, 0);
2676 
2677           lwkt_replymsg(&cmsg->base.lmsg, 0);
2678 }
2679 
2680 static void
carp_init(void * xsc)2681 carp_init(void *xsc)
2682 {
2683           struct carp_softc *sc = xsc;
2684           struct ifnet *ifp = &sc->arpcom.ac_if;
2685           struct netmsg_carp cmsg;
2686 
2687           ASSERT_IFNET_SERIALIZED_ALL(ifp);
2688 
2689           ifnet_deserialize_all(ifp);
2690 
2691           bzero(&cmsg, sizeof(cmsg));
2692           netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2693               carp_init_dispatch);
2694           cmsg.nc_softc = sc;
2695 
2696           lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2697 
2698           ifnet_serialize_all(ifp);
2699 }
2700 
2701 static int
carp_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)2702 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2703     struct rtentry *rt)
2704 {
2705           struct carp_softc *sc = ifp->if_softc;
2706           struct ifnet *carpdev;
2707           int error = 0;
2708 
2709           carpdev = sc->sc_carpdev;
2710           if (carpdev != NULL) {
2711                     if (m->m_flags & M_MCAST)
2712                               IFNET_STAT_INC(ifp, omcasts, 1);
2713                     IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
2714                     IFNET_STAT_INC(ifp, opackets, 1);
2715 
2716                     /*
2717                      * NOTE:
2718                      * CARP's ifp is passed to backing device's
2719                      * if_output method.
2720                      */
2721                     carpdev->if_output(ifp, m, dst, rt);
2722           } else {
2723                     IFNET_STAT_INC(ifp, oerrors, 1);
2724                     m_freem(m);
2725                     error = ENETUNREACH;
2726           }
2727           return error;
2728 }
2729 
2730 /*
2731  * Start output on carp interface. This function should never be called.
2732  */
2733 static void
carp_start(struct ifnet * ifp,struct ifaltq_subque * ifsq __unused)2734 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2735 {
2736           panic("%s: start called", ifp->if_xname);
2737 }
2738 
2739 static void
carp_set_state(struct carp_softc * sc,int state)2740 carp_set_state(struct carp_softc *sc, int state)
2741 {
2742           struct ifnet *cifp = &sc->sc_if;
2743 
2744           if (sc->sc_state == state)
2745                     return;
2746           sc->sc_state = state;
2747 
2748           switch (sc->sc_state) {
2749           case BACKUP:
2750                     cifp->if_link_state = LINK_STATE_DOWN;
2751                     break;
2752 
2753           case MASTER:
2754                     cifp->if_link_state = LINK_STATE_UP;
2755                     break;
2756 
2757           default:
2758                     cifp->if_link_state = LINK_STATE_UNKNOWN;
2759                     break;
2760           }
2761           rt_ifmsg(cifp);
2762 }
2763 
2764 void
carp_group_demote_adj(struct ifnet * ifp,int adj)2765 carp_group_demote_adj(struct ifnet *ifp, int adj)
2766 {
2767           struct ifg_list     *ifgl;
2768           int *dm;
2769 
2770           TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2771                     if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2772                               continue;
2773                     dm = &ifgl->ifgl_group->ifg_carp_demoted;
2774 
2775                     if (*dm + adj >= 0)
2776                               *dm += adj;
2777                     else
2778                               *dm = 0;
2779 
2780                     if (adj > 0 && *dm == 1)
2781                               carp_send_ad_all();
2782                     CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2783                     ifgl->ifgl_group->ifg_group, *dm);
2784           }
2785 }
2786 
2787 #ifdef foo
2788 void
carp_carpdev_state(void * v)2789 carp_carpdev_state(void *v)
2790 {
2791           struct carp_if *cif = v;
2792           struct carp_softc *sc;
2793 
2794           TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2795                     carp_sc_state(sc);
2796 }
2797 
2798 static void
carp_sc_state(struct carp_softc * sc)2799 carp_sc_state(struct carp_softc *sc)
2800 {
2801           if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2802                     callout_stop(&sc->sc_ad_tmo);
2803                     callout_stop(&sc->sc_md_tmo);
2804                     callout_stop(&sc->sc_md6_tmo);
2805                     carp_set_state(sc, INIT);
2806                     carp_setrun(sc, 0);
2807                     if (!sc->sc_suppress) {
2808                               carp_suppress_preempt++;
2809                               if (carp_suppress_preempt == 1)
2810                                         carp_send_ad_all();
2811                     }
2812                     sc->sc_suppress = 1;
2813           } else {
2814                     carp_set_state(sc, INIT);
2815                     carp_setrun(sc, 0);
2816                     if (sc->sc_suppress)
2817                               carp_suppress_preempt--;
2818                     sc->sc_suppress = 0;
2819           }
2820 }
2821 #endif
2822 
2823 static void
carp_stop(struct carp_softc * sc,boolean_t detach)2824 carp_stop(struct carp_softc *sc, boolean_t detach)
2825 {
2826           sc->sc_if.if_flags &= ~IFF_RUNNING;
2827 
2828           callout_stop(&sc->sc_ad_tmo);
2829           callout_stop(&sc->sc_md_tmo);
2830           callout_stop(&sc->sc_md6_tmo);
2831 
2832           if (!detach && sc->sc_state == MASTER)
2833                     carp_send_ad(sc);
2834 
2835           if (sc->sc_suppress)
2836                     carp_suppress_preempt--;
2837           sc->sc_suppress = 0;
2838 
2839           if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2840                     carp_suppress_preempt--;
2841           sc->sc_sendad_errors = 0;
2842           sc->sc_sendad_success = 0;
2843 
2844           carp_set_state(sc, INIT);
2845           carp_setrun(sc, 0);
2846 }
2847 
2848 static void
carp_suspend(struct carp_softc * sc,boolean_t detach)2849 carp_suspend(struct carp_softc *sc, boolean_t detach)
2850 {
2851           struct ifnet *cifp = &sc->sc_if;
2852 
2853           carp_stop(sc, detach);
2854 
2855           /* Retain the running state, if we are not dead yet */
2856           if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2857                     cifp->if_flags |= IFF_RUNNING;
2858 }
2859 
2860 static int
carp_activate_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,struct ifnet * ifp,struct in_ifaddr * ia_if,int own)2861 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2862     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2863 {
2864           struct ip_moptions *imo = &sc->sc_imo;
2865           struct carp_if *ocif = ifp->if_carp;
2866           int error;
2867 
2868           KKASSERT(vha->vha_ia != NULL);
2869 
2870           KASSERT(ia_if != NULL, ("NULL backing address"));
2871           KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2872           KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2873                     ("inactive vhaddr %p is the address owner", vha));
2874 
2875           KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2876                     ("%s is already on %s", sc->sc_if.if_xname,
2877                      sc->sc_carpdev->if_xname));
2878 
2879           if (ocif == NULL) {
2880                     KASSERT(sc->sc_carpdev == NULL,
2881                               ("%s is already on %s", sc->sc_if.if_xname,
2882                                sc->sc_carpdev->if_xname));
2883 
2884                     error = ifpromisc(ifp, 1);
2885                     if (error)
2886                               return error;
2887           } else {
2888                     struct carp_softc_container *scc;
2889 
2890                     TAILQ_FOREACH(scc, ocif, scc_link) {
2891                               struct carp_softc *vr = scc->scc_softc;
2892 
2893                               if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2894                                         return EINVAL;
2895                     }
2896           }
2897 
2898           ifp->if_carp = carp_if_insert(ocif, sc);
2899           KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2900 
2901           sc->sc_ia = ia_if;
2902           sc->sc_carpdev = ifp;
2903           sc->arpcom.ac_if.if_hwassist = 0;
2904           if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2905                     sc->arpcom.ac_if.if_hwassist |=
2906                         (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2907           }
2908           if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2909                     sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2910 
2911           /*
2912            * Make sure that all protocol threads see the sc_carpdev and
2913            * if_carp changes
2914            */
2915           netmsg_service_sync();
2916 
2917           if (ocif != NULL && ifp->if_carp != ocif) {
2918                     /*
2919                      * The old carp list could be safely free now,
2920                      * since no one can access it.
2921                      */
2922                     carp_if_free(ocif);
2923           }
2924 
2925           vha->vha_iaback = ia_if;
2926           sc->sc_naddrs++;
2927 
2928           if (own) {
2929                     vha->vha_flags |= CARP_VHAF_OWNER;
2930 
2931                     /* XXX save user configured advskew? */
2932                     sc->sc_advskew = 0;
2933           }
2934 
2935           carp_addroute_vhaddr(sc, vha);
2936 
2937           /*
2938            * Join the multicast group only after the backing interface
2939            * has been hooked with the CARP interface.
2940            */
2941           KASSERT(imo->imo_multicast_ifp == NULL ||
2942                     imo->imo_multicast_ifp == &sc->sc_if,
2943                     ("%s didn't leave mcast group on %s",
2944                      sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2945 
2946           if (imo->imo_num_memberships == 0) {
2947                     struct in_addr addr;
2948 
2949                     addr.s_addr = htonl(INADDR_CARP_GROUP);
2950                     imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2951                     if (imo->imo_membership[0] == NULL) {
2952                               carp_deactivate_vhaddr(sc, vha, FALSE);
2953                               return ENOBUFS;
2954                     }
2955 
2956                     imo->imo_num_memberships++;
2957                     imo->imo_multicast_ifp = &sc->sc_if;
2958                     imo->imo_multicast_ttl = CARP_DFLTTL;
2959                     imo->imo_multicast_loop = 0;
2960           }
2961 
2962           carp_hmac_prepare(sc);
2963           carp_set_state(sc, INIT);
2964           carp_setrun(sc, 0);
2965           return 0;
2966 }
2967 
2968 static void
carp_deactivate_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,boolean_t del_iaback)2969 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2970     boolean_t del_iaback)
2971 {
2972           KKASSERT(vha->vha_ia != NULL);
2973 
2974           carp_hmac_prepare(sc);
2975 
2976           if (vha->vha_iaback == NULL) {
2977                     KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2978                               ("inactive vhaddr %p is the address owner", vha));
2979                     return;
2980           }
2981 
2982           vha->vha_flags &= ~CARP_VHAF_OWNER;
2983           carp_delroute_vhaddr(sc, vha, del_iaback);
2984 
2985           KKASSERT(sc->sc_naddrs > 0);
2986           vha->vha_iaback = NULL;
2987           sc->sc_naddrs--;
2988           if (!sc->sc_naddrs) {
2989                     if (sc->sc_naddrs6) {
2990                               carp_multicast_cleanup(sc);
2991                               sc->sc_ia = NULL;
2992                     } else {
2993                               carp_detach(sc, FALSE, del_iaback);
2994                     }
2995           }
2996 }
2997 
2998 static void
carp_link_addrs(struct carp_softc * sc,struct ifnet * ifp,struct ifaddr * ifa_if)2999 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
3000 {
3001           struct carp_vhaddr *vha;
3002           struct in_ifaddr *ia_if;
3003 
3004           KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3005           ia_if = ifatoia(ifa_if);
3006 
3007           /*
3008            * Test each inactive vhaddr against the newly added address.
3009            * If the newly added address could be the backing address,
3010            * then activate the matching vhaddr.
3011            */
3012           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3013                     const struct in_ifaddr *ia;
3014                     int own;
3015 
3016                     if (vha->vha_iaback != NULL)
3017                               continue;
3018 
3019                     ia = vha->vha_ia;
3020                     if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
3021                         ia->ia_subnet != ia_if->ia_subnet)
3022                               continue;
3023 
3024                     own = 0;
3025                     if (ia->ia_addr.sin_addr.s_addr ==
3026                         ia_if->ia_addr.sin_addr.s_addr)
3027                               own = 1;
3028 
3029                     carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3030           }
3031 }
3032 
3033 static void
carp_unlink_addrs(struct carp_softc * sc,struct ifnet * ifp,struct ifaddr * ifa_if)3034 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3035                       struct ifaddr *ifa_if)
3036 {
3037           struct carp_vhaddr *vha;
3038           struct in_ifaddr *ia_if;
3039 
3040           KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3041           ia_if = ifatoia(ifa_if);
3042 
3043           /*
3044            * Ad src address is deleted; set it to NULL.
3045            * Following loop will try pick up a new ad src address
3046            * if one of the vhaddr could retain its backing address.
3047            */
3048           if (sc->sc_ia == ia_if)
3049                     sc->sc_ia = NULL;
3050 
3051           /*
3052            * Test each active vhaddr against the deleted address.
3053            * If the deleted address is vhaddr address's backing
3054            * address, then deactivate the vhaddr.
3055            */
3056           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3057                     if (vha->vha_iaback == NULL)
3058                               continue;
3059 
3060                     if (vha->vha_iaback == ia_if)
3061                               carp_deactivate_vhaddr(sc, vha, TRUE);
3062                     else if (sc->sc_ia == NULL)
3063                               sc->sc_ia = vha->vha_iaback;
3064           }
3065 }
3066 
3067 static void
carp_update_addrs(struct carp_softc * sc,struct ifaddr * ifa_del)3068 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3069 {
3070           struct carp_vhaddr *vha;
3071 
3072           KKASSERT(sc->sc_carpdev == NULL);
3073 
3074           TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3075                     carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3076 }
3077 
3078 static void
carp_ifaddr(void * arg __unused,struct ifnet * ifp,enum ifaddr_event event,struct ifaddr * ifa)3079 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3080               enum ifaddr_event event, struct ifaddr *ifa)
3081 {
3082           struct carp_softc *sc;
3083 
3084           if (ifa->ifa_addr->sa_family != AF_INET)
3085                     return;
3086 
3087           ASSERT_NETISR0;
3088 
3089           if (ifp->if_type == IFT_CARP) {
3090                     /*
3091                      * Address is changed on carp(4) interface
3092                      */
3093                     switch (event) {
3094                     case IFADDR_EVENT_ADD:
3095                               carp_add_addr(ifp->if_softc, ifa);
3096                               break;
3097 
3098                     case IFADDR_EVENT_CHANGE:
3099                               carp_config_addr(ifp->if_softc, ifa);
3100                               break;
3101 
3102                     case IFADDR_EVENT_DELETE:
3103                               carp_del_addr(ifp->if_softc, ifa);
3104                               break;
3105                     }
3106                     return;
3107           }
3108 
3109           /*
3110            * Address is changed on non-carp(4) interface
3111            */
3112           if ((ifp->if_flags & IFF_MULTICAST) == 0)
3113                     return;
3114 
3115           LIST_FOREACH(sc, &carpif_list, sc_next) {
3116                     if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3117                               /* Not the parent iface; skip */
3118                               continue;
3119                     }
3120 
3121                     switch (event) {
3122                     case IFADDR_EVENT_ADD:
3123                               carp_link_addrs(sc, ifp, ifa);
3124                               break;
3125 
3126                     case IFADDR_EVENT_DELETE:
3127                               if (sc->sc_carpdev != NULL) {
3128                                         carp_unlink_addrs(sc, ifp, ifa);
3129                                         if (sc->sc_carpdev == NULL) {
3130                                                   /*
3131                                                    * We no longer have the parent
3132                                                    * interface, however, certain
3133                                                    * virtual addresses, which are
3134                                                    * not used because they can't
3135                                                    * match the previous parent
3136                                                    * interface's addresses, may now
3137                                                    * match different interface's
3138                                                    * addresses.
3139                                                    */
3140                                                   carp_update_addrs(sc, ifa);
3141                                         }
3142                               } else {
3143                                         /*
3144                                          * The carp(4) interface didn't have a
3145                                          * parent iface, so it is not possible
3146                                          * that it will contain any address to
3147                                          * be unlinked.
3148                                          */
3149                               }
3150                               break;
3151 
3152                     case IFADDR_EVENT_CHANGE:
3153                               if (sc->sc_carpdev == NULL) {
3154                                         /*
3155                                          * The carp(4) interface didn't have a
3156                                          * parent iface, so it is not possible
3157                                          * that it will contain any address to
3158                                          * be updated.
3159                                          */
3160                                         carp_link_addrs(sc, ifp, ifa);
3161                               } else {
3162                                         /*
3163                                          * First try breaking tie with the old
3164                                          * address.  Then see whether we could
3165                                          * link certain vhaddr to the new address.
3166                                          * If that fails, i.e. carpdev is NULL,
3167                                          * we try a global update.
3168                                          *
3169                                          * NOTE: The above order is critical.
3170                                          */
3171                                         carp_unlink_addrs(sc, ifp, ifa);
3172                                         carp_link_addrs(sc, ifp, ifa);
3173                                         if (sc->sc_carpdev == NULL) {
3174                                                   /*
3175                                                    * See the comment in the above
3176                                                    * IFADDR_EVENT_DELETE block.
3177                                                    */
3178                                                   carp_update_addrs(sc, NULL);
3179                                         }
3180                               }
3181                               break;
3182                     }
3183           }
3184 }
3185 
3186 void
carp_proto_ctlinput(netmsg_t msg)3187 carp_proto_ctlinput(netmsg_t msg)
3188 {
3189           int cmd = msg->ctlinput.nm_cmd;
3190           struct sockaddr *sa = msg->ctlinput.nm_arg;
3191           struct in_ifaddr_container *iac;
3192 
3193           /* We only process PRC_IFDOWN and PRC_IFUP commands */
3194           if (cmd != PRC_IFDOWN && cmd != PRC_IFUP)
3195                     goto done;
3196 
3197           TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3198                     struct in_ifaddr *ia = iac->ia;
3199                     struct ifnet *ifp = ia->ia_ifp;
3200 
3201                     if (ifp->if_type == IFT_CARP)
3202                               continue;
3203 
3204                     if (ia->ia_ifa.ifa_addr == sa) {
3205                               if (cmd == PRC_IFDOWN) {
3206                                         carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3207                                             &ia->ia_ifa);
3208                               } else if (cmd == PRC_IFUP) {
3209                                         carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3210                                             &ia->ia_ifa);
3211                               }
3212                               break;
3213                     }
3214           }
3215 done:
3216           lwkt_replymsg(&msg->lmsg, 0);
3217 }
3218 
3219 struct ifnet *
carp_parent(struct ifnet * cifp)3220 carp_parent(struct ifnet *cifp)
3221 {
3222           struct carp_softc *sc;
3223 
3224           KKASSERT(cifp->if_type == IFT_CARP);
3225           sc = cifp->if_softc;
3226 
3227           return sc->sc_carpdev;
3228 }
3229 
3230 #define rtinitflags(x) \
3231           (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3232                      ? RTF_HOST : 0)
3233 
3234 static int
carp_addroute_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha)3235 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3236 {
3237           struct in_ifaddr *ia, *iaback;
3238 
3239           if (sc->sc_state != MASTER)
3240                     return 0;
3241 
3242           ia = vha->vha_ia;
3243           KKASSERT(ia != NULL);
3244 
3245           iaback = vha->vha_iaback;
3246           KKASSERT(iaback != NULL);
3247 
3248           return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3249 }
3250 
3251 static void
carp_delroute_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,boolean_t del_iaback)3252 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3253     boolean_t del_iaback)
3254 {
3255           struct in_ifaddr *ia, *iaback;
3256 
3257           ia = vha->vha_ia;
3258           KKASSERT(ia != NULL);
3259 
3260           iaback = vha->vha_iaback;
3261           KKASSERT(iaback != NULL);
3262 
3263           if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3264                     rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3265                     return;
3266           }
3267 
3268           rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3269           in_ifadown_force(&ia->ia_ifa, 1);
3270           ia->ia_flags &= ~IFA_ROUTE;
3271 }
3272 
3273 static int
carp_modevent(module_t mod,int type,void * data)3274 carp_modevent(module_t mod, int type, void *data)
3275 {
3276           switch (type) {
3277           case MOD_LOAD:
3278                     LIST_INIT(&carpif_list);
3279                     carp_ifdetach_event =
3280                     EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3281                                               EVENTHANDLER_PRI_ANY);
3282                     carp_ifaddr_event =
3283                     EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3284                                               EVENTHANDLER_PRI_FIRST);
3285                     if_clone_attach(&carp_cloner);
3286                     break;
3287 
3288           case MOD_UNLOAD:
3289                     EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3290                                                   carp_ifdetach_event);
3291                     EVENTHANDLER_DEREGISTER(ifaddr_event,
3292                                                   carp_ifaddr_event);
3293                     if_clone_detach(&carp_cloner);
3294                     break;
3295 
3296           default:
3297                     return (EINVAL);
3298           }
3299           return (0);
3300 }
3301 
3302 static moduledata_t carp_mod = {
3303           "carp",
3304           carp_modevent,
3305           0
3306 };
3307 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3308