1 /* $OpenBSD: session.c,v 1.510 2025/02/06 12:38:58 claudio Exp $ */
2
3 /*
4 * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5 * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 #include <sys/types.h>
21
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <ifaddrs.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <unistd.h>
45
46 #include "bgpd.h"
47 #include "session.h"
48 #include "log.h"
49
50 #define PFD_PIPE_MAIN 0
51 #define PFD_PIPE_ROUTE 1
52 #define PFD_PIPE_ROUTE_CTL 2
53 #define PFD_SOCK_CTL 3
54 #define PFD_SOCK_RCTL 4
55 #define PFD_LISTENERS_START 5
56
57 void session_sighdlr(int);
58 int setup_listeners(u_int *);
59 void init_peer(struct peer *);
60 void start_timer_holdtime(struct peer *);
61 void start_timer_sendholdtime(struct peer *);
62 void start_timer_keepalive(struct peer *);
63 void session_close_connection(struct peer *);
64 void change_state(struct peer *, enum session_state, enum session_events);
65 int session_setup_socket(struct peer *);
66 void session_accept(int);
67 int session_connect(struct peer *);
68 void session_tcp_established(struct peer *);
69 int session_capa_add(struct ibuf *, uint8_t, uint8_t);
70 struct ibuf *session_newmsg(enum msg_type, uint16_t);
71 void session_sendmsg(struct ibuf *, struct peer *, enum msg_type);
72 void session_open(struct peer *);
73 void session_keepalive(struct peer *);
74 void session_update(uint32_t, struct ibuf *);
75 void session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *);
76 void session_notification_data(struct peer *, uint8_t, uint8_t, void *,
77 size_t);
78 void session_rrefresh(struct peer *, uint8_t, uint8_t);
79 int session_graceful_restart(struct peer *);
80 int session_graceful_stop(struct peer *);
81 int session_dispatch_msg(struct pollfd *, struct peer *);
82 void session_process_msg(struct peer *);
83 struct ibuf *parse_header(struct ibuf *, void *, int *);
84 int parse_open(struct peer *, struct ibuf *);
85 int parse_update(struct peer *, struct ibuf *);
86 int parse_rrefresh(struct peer *, struct ibuf *);
87 void parse_notification(struct peer *, struct ibuf *);
88 int parse_capabilities(struct peer *, struct ibuf *, uint32_t *);
89 int capa_neg_calc(struct peer *);
90 void session_dispatch_imsg(struct imsgbuf *, int, u_int *);
91 void session_up(struct peer *);
92 void session_down(struct peer *);
93 int imsg_rde(int, uint32_t, void *, uint16_t);
94 void session_demote(struct peer *, int);
95 void merge_peers(struct bgpd_config *, struct bgpd_config *);
96
97 int la_cmp(struct listen_addr *, struct listen_addr *);
98 void session_template_clone(struct peer *, struct sockaddr *,
99 uint32_t, uint32_t);
100 int session_match_mask(struct peer *, struct bgpd_addr *);
101
102 static struct bgpd_config *conf, *nconf;
103 static struct imsgbuf *ibuf_rde;
104 static struct imsgbuf *ibuf_rde_ctl;
105 static struct imsgbuf *ibuf_main;
106
107 struct bgpd_sysdep sysdep;
108 volatile sig_atomic_t session_quit;
109 int pending_reconf;
110 int csock = -1, rcsock = -1;
111 u_int peer_cnt;
112
113 struct mrt_head mrthead;
114 time_t pauseaccept;
115
116 static const uint8_t marker[MSGSIZE_HEADER_MARKER] = {
117 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
118 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
119 };
120
121 static inline int
peer_compare(const struct peer * a,const struct peer * b)122 peer_compare(const struct peer *a, const struct peer *b)
123 {
124 return a->conf.id - b->conf.id;
125 }
126
127 RB_GENERATE(peer_head, peer, entry, peer_compare);
128
129 void
session_sighdlr(int sig)130 session_sighdlr(int sig)
131 {
132 switch (sig) {
133 case SIGINT:
134 case SIGTERM:
135 session_quit = 1;
136 break;
137 }
138 }
139
140 int
setup_listeners(u_int * la_cnt)141 setup_listeners(u_int *la_cnt)
142 {
143 int ttl = 255;
144 struct listen_addr *la;
145 u_int cnt = 0;
146
147 TAILQ_FOREACH(la, conf->listen_addrs, entry) {
148 la->reconf = RECONF_NONE;
149 cnt++;
150
151 if (la->flags & LISTENER_LISTENING)
152 continue;
153
154 if (la->fd == -1) {
155 log_warn("cannot establish listener on %s: invalid fd",
156 log_sockaddr((struct sockaddr *)&la->sa,
157 la->sa_len));
158 continue;
159 }
160
161 if (tcp_md5_prep_listener(la, &conf->peers) == -1)
162 fatal("tcp_md5_prep_listener");
163
164 /* set ttl to 255 so that ttl-security works */
165 if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
166 IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
167 log_warn("setup_listeners setsockopt TTL");
168 continue;
169 }
170 if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
171 IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
172 log_warn("setup_listeners setsockopt hoplimit");
173 continue;
174 }
175
176 if (listen(la->fd, MAX_BACKLOG)) {
177 close(la->fd);
178 fatal("listen");
179 }
180
181 la->flags |= LISTENER_LISTENING;
182
183 log_info("listening on %s",
184 log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
185 }
186
187 *la_cnt = cnt;
188
189 return (0);
190 }
191
192 void
session_main(int debug,int verbose)193 session_main(int debug, int verbose)
194 {
195 int timeout;
196 unsigned int i, j, idx_peers, idx_listeners, idx_mrts;
197 u_int pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
198 u_int listener_cnt, ctl_cnt, mrt_cnt;
199 u_int new_cnt;
200 struct passwd *pw;
201 struct peer *p, **peer_l = NULL, *next;
202 struct mrt *m, *xm, **mrt_l = NULL;
203 struct pollfd *pfd = NULL;
204 struct listen_addr *la;
205 void *newp;
206 time_t now;
207 short events;
208
209 log_init(debug, LOG_DAEMON);
210 log_setverbose(verbose);
211
212 log_procinit(log_procnames[PROC_SE]);
213
214 if ((pw = getpwnam(BGPD_USER)) == NULL)
215 fatal(NULL);
216
217 if (chroot(pw->pw_dir) == -1)
218 fatal("chroot");
219 if (chdir("/") == -1)
220 fatal("chdir(\"/\")");
221
222 setproctitle("session engine");
223
224 if (setgroups(1, &pw->pw_gid) ||
225 setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
226 setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
227 fatal("can't drop privileges");
228
229 if (pledge("stdio inet recvfd", NULL) == -1)
230 fatal("pledge");
231
232 signal(SIGTERM, session_sighdlr);
233 signal(SIGINT, session_sighdlr);
234 signal(SIGPIPE, SIG_IGN);
235 signal(SIGHUP, SIG_IGN);
236 signal(SIGALRM, SIG_IGN);
237 signal(SIGUSR1, SIG_IGN);
238
239 if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
240 fatal(NULL);
241 if (imsgbuf_init(ibuf_main, 3) == -1 ||
242 imsgbuf_set_maxsize(ibuf_main, MAX_BGPD_IMSGSIZE) == -1)
243 fatal(NULL);
244 imsgbuf_allow_fdpass(ibuf_main);
245
246 LIST_INIT(&mrthead);
247 listener_cnt = 0;
248 peer_cnt = 0;
249 ctl_cnt = 0;
250
251 conf = new_config();
252 log_info("session engine ready");
253
254 while (session_quit == 0) {
255 /* check for peers to be initialized or deleted */
256 if (!pending_reconf) {
257 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
258 /* new peer that needs init? */
259 if (p->state == STATE_NONE)
260 init_peer(p);
261
262 /* deletion due? */
263 if (p->reconf_action == RECONF_DELETE) {
264 if (p->demoted)
265 session_demote(p, -1);
266 p->conf.demote_group[0] = 0;
267 session_stop(p, ERR_CEASE_PEER_UNCONF,
268 NULL);
269 timer_remove_all(&p->timers);
270 tcp_md5_del_listener(conf, p);
271 if (imsg_rde(IMSG_SESSION_DELETE,
272 p->conf.id, NULL, 0) == -1)
273 fatalx("imsg_compose error");
274 msgbuf_free(p->wbuf);
275 RB_REMOVE(peer_head, &conf->peers, p);
276 log_peer_warnx(&p->conf, "removed");
277 free(p);
278 peer_cnt--;
279 continue;
280 }
281 p->reconf_action = RECONF_NONE;
282 }
283 }
284
285 if (peer_cnt > peer_l_elms) {
286 if ((newp = reallocarray(peer_l, peer_cnt,
287 sizeof(struct peer *))) == NULL) {
288 /* panic for now */
289 log_warn("could not resize peer_l from %u -> %u"
290 " entries", peer_l_elms, peer_cnt);
291 fatalx("exiting");
292 }
293 peer_l = newp;
294 peer_l_elms = peer_cnt;
295 }
296
297 mrt_cnt = 0;
298 for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
299 xm = LIST_NEXT(m, entry);
300 if (m->state == MRT_STATE_REMOVE) {
301 mrt_clean(m);
302 LIST_REMOVE(m, entry);
303 free(m);
304 continue;
305 }
306 if (msgbuf_queuelen(m->wbuf) > 0)
307 mrt_cnt++;
308 }
309
310 if (mrt_cnt > mrt_l_elms) {
311 if ((newp = reallocarray(mrt_l, mrt_cnt,
312 sizeof(struct mrt *))) == NULL) {
313 /* panic for now */
314 log_warn("could not resize mrt_l from %u -> %u"
315 " entries", mrt_l_elms, mrt_cnt);
316 fatalx("exiting");
317 }
318 mrt_l = newp;
319 mrt_l_elms = mrt_cnt;
320 }
321
322 new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
323 ctl_cnt + mrt_cnt;
324 if (new_cnt > pfd_elms) {
325 if ((newp = reallocarray(pfd, new_cnt,
326 sizeof(struct pollfd))) == NULL) {
327 /* panic for now */
328 log_warn("could not resize pfd from %u -> %u"
329 " entries", pfd_elms, new_cnt);
330 fatalx("exiting");
331 }
332 pfd = newp;
333 pfd_elms = new_cnt;
334 }
335
336 memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
337
338 set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
339 set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
340 set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
341
342 if (pauseaccept == 0) {
343 pfd[PFD_SOCK_CTL].fd = csock;
344 pfd[PFD_SOCK_CTL].events = POLLIN;
345 pfd[PFD_SOCK_RCTL].fd = rcsock;
346 pfd[PFD_SOCK_RCTL].events = POLLIN;
347 } else {
348 pfd[PFD_SOCK_CTL].fd = -1;
349 pfd[PFD_SOCK_RCTL].fd = -1;
350 }
351
352 i = PFD_LISTENERS_START;
353 TAILQ_FOREACH(la, conf->listen_addrs, entry) {
354 if (pauseaccept == 0) {
355 pfd[i].fd = la->fd;
356 pfd[i].events = POLLIN;
357 } else
358 pfd[i].fd = -1;
359 i++;
360 }
361 idx_listeners = i;
362 timeout = 240; /* loop every 240s at least */
363
364 now = getmonotime();
365 RB_FOREACH(p, peer_head, &conf->peers) {
366 time_t nextaction;
367 struct timer *pt;
368
369 /* check timers */
370 if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
371 switch (pt->type) {
372 case Timer_Hold:
373 bgp_fsm(p, EVNT_TIMER_HOLDTIME, NULL);
374 break;
375 case Timer_SendHold:
376 bgp_fsm(p, EVNT_TIMER_SENDHOLD, NULL);
377 break;
378 case Timer_ConnectRetry:
379 bgp_fsm(p, EVNT_TIMER_CONNRETRY, NULL);
380 break;
381 case Timer_Keepalive:
382 bgp_fsm(p, EVNT_TIMER_KEEPALIVE, NULL);
383 break;
384 case Timer_IdleHold:
385 bgp_fsm(p, EVNT_START, NULL);
386 break;
387 case Timer_IdleHoldReset:
388 p->IdleHoldTime =
389 INTERVAL_IDLE_HOLD_INITIAL;
390 p->errcnt = 0;
391 timer_stop(&p->timers,
392 Timer_IdleHoldReset);
393 break;
394 case Timer_CarpUndemote:
395 timer_stop(&p->timers,
396 Timer_CarpUndemote);
397 if (p->demoted &&
398 p->state == STATE_ESTABLISHED)
399 session_demote(p, -1);
400 break;
401 case Timer_RestartTimeout:
402 timer_stop(&p->timers,
403 Timer_RestartTimeout);
404 session_graceful_stop(p);
405 break;
406 case Timer_SessionDown:
407 timer_stop(&p->timers,
408 Timer_SessionDown);
409
410 if (imsg_rde(IMSG_SESSION_DELETE,
411 p->conf.id, NULL, 0) == -1)
412 fatalx("imsg_compose error");
413 p->rdesession = 0;
414
415 /* finally delete this cloned peer */
416 if (p->template)
417 p->reconf_action =
418 RECONF_DELETE;
419 break;
420 default:
421 fatalx("King Bula lost in time");
422 }
423 }
424 if ((nextaction = timer_nextduein(&p->timers,
425 now)) != -1 && nextaction < timeout)
426 timeout = nextaction;
427
428 /* are we waiting for a write? */
429 events = POLLIN;
430 if (msgbuf_queuelen(p->wbuf) > 0 ||
431 p->state == STATE_CONNECT)
432 events |= POLLOUT;
433 /* is there still work to do? */
434 if (p->rpending)
435 timeout = 0;
436
437 /* poll events */
438 if (p->fd != -1 && events != 0) {
439 pfd[i].fd = p->fd;
440 pfd[i].events = events;
441 peer_l[i - idx_listeners] = p;
442 i++;
443 }
444 }
445
446 idx_peers = i;
447
448 LIST_FOREACH(m, &mrthead, entry)
449 if (msgbuf_queuelen(m->wbuf) > 0) {
450 pfd[i].fd = m->fd;
451 pfd[i].events = POLLOUT;
452 mrt_l[i - idx_peers] = m;
453 i++;
454 }
455
456 idx_mrts = i;
457
458 i += control_fill_pfds(pfd + i, pfd_elms -i);
459
460 if (i > pfd_elms)
461 fatalx("poll pfd overflow");
462
463 if (pauseaccept && timeout > 1)
464 timeout = 1;
465 if (timeout < 0)
466 timeout = 0;
467 if (poll(pfd, i, timeout * 1000) == -1) {
468 if (errno == EINTR)
469 continue;
470 fatal("poll error");
471 }
472
473 /*
474 * If we previously saw fd exhaustion, we stop accept()
475 * for 1 second to throttle the accept() loop.
476 */
477 if (pauseaccept && getmonotime() > pauseaccept + 1)
478 pauseaccept = 0;
479
480 if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
481 log_warnx("SE: Lost connection to parent");
482 session_quit = 1;
483 continue;
484 } else
485 session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
486 &listener_cnt);
487
488 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
489 log_warnx("SE: Lost connection to RDE");
490 imsgbuf_clear(ibuf_rde);
491 free(ibuf_rde);
492 ibuf_rde = NULL;
493 } else
494 session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
495 &listener_cnt);
496
497 if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
498 -1) {
499 log_warnx("SE: Lost connection to RDE control");
500 imsgbuf_clear(ibuf_rde_ctl);
501 free(ibuf_rde_ctl);
502 ibuf_rde_ctl = NULL;
503 } else
504 session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
505 &listener_cnt);
506
507 if (pfd[PFD_SOCK_CTL].revents & POLLIN)
508 ctl_cnt += control_accept(csock, 0);
509
510 if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
511 ctl_cnt += control_accept(rcsock, 1);
512
513 for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
514 if (pfd[j].revents & POLLIN)
515 session_accept(pfd[j].fd);
516
517 for (; j < idx_peers; j++)
518 session_dispatch_msg(&pfd[j],
519 peer_l[j - idx_listeners]);
520
521 RB_FOREACH(p, peer_head, &conf->peers)
522 session_process_msg(p);
523
524 for (; j < idx_mrts; j++)
525 if (pfd[j].revents & POLLOUT)
526 mrt_write(mrt_l[j - idx_peers]);
527
528 for (; j < i; j++)
529 ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers);
530 }
531
532 RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
533 session_stop(p, ERR_CEASE_ADMIN_DOWN, "bgpd shutting down");
534 timer_remove_all(&p->timers);
535 tcp_md5_del_listener(conf, p);
536 RB_REMOVE(peer_head, &conf->peers, p);
537 free(p);
538 }
539
540 while ((m = LIST_FIRST(&mrthead)) != NULL) {
541 mrt_clean(m);
542 LIST_REMOVE(m, entry);
543 free(m);
544 }
545
546 free_config(conf);
547 free(peer_l);
548 free(mrt_l);
549 free(pfd);
550
551 /* close pipes */
552 if (ibuf_rde) {
553 imsgbuf_write(ibuf_rde);
554 imsgbuf_clear(ibuf_rde);
555 close(ibuf_rde->fd);
556 free(ibuf_rde);
557 }
558 if (ibuf_rde_ctl) {
559 imsgbuf_clear(ibuf_rde_ctl);
560 close(ibuf_rde_ctl->fd);
561 free(ibuf_rde_ctl);
562 }
563 imsgbuf_write(ibuf_main);
564 imsgbuf_clear(ibuf_main);
565 close(ibuf_main->fd);
566 free(ibuf_main);
567
568 control_shutdown(csock);
569 control_shutdown(rcsock);
570 log_info("session engine exiting");
571 exit(0);
572 }
573
574 void
init_peer(struct peer * p)575 init_peer(struct peer *p)
576 {
577 TAILQ_INIT(&p->timers);
578 p->fd = -1;
579 if (p->wbuf != NULL)
580 fatalx("%s: msgbuf already set", __func__);
581 if ((p->wbuf = msgbuf_new_reader(MSGSIZE_HEADER, parse_header, p)) ==
582 NULL)
583 fatal(NULL);
584
585 if (p->conf.if_depend[0])
586 imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1,
587 p->conf.if_depend, sizeof(p->conf.if_depend));
588 else
589 p->depend_ok = 1;
590
591 peer_cnt++;
592
593 change_state(p, STATE_IDLE, EVNT_NONE);
594 if (p->conf.down)
595 timer_stop(&p->timers, Timer_IdleHold); /* no autostart */
596 else
597 timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY);
598
599 p->stats.last_updown = getmonotime();
600
601 /*
602 * on startup, demote if requested.
603 * do not handle new peers. they must reach ESTABLISHED beforehand.
604 * peers added at runtime have reconf_action set to RECONF_REINIT.
605 */
606 if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
607 session_demote(p, +1);
608 }
609
610 void
bgp_fsm(struct peer * peer,enum session_events event,struct ibuf * msg)611 bgp_fsm(struct peer *peer, enum session_events event, struct ibuf *msg)
612 {
613 switch (peer->state) {
614 case STATE_NONE:
615 /* nothing */
616 break;
617 case STATE_IDLE:
618 switch (event) {
619 case EVNT_START:
620 timer_stop(&peer->timers, Timer_Hold);
621 timer_stop(&peer->timers, Timer_SendHold);
622 timer_stop(&peer->timers, Timer_Keepalive);
623 timer_stop(&peer->timers, Timer_IdleHold);
624
625 if (!peer->depend_ok)
626 timer_stop(&peer->timers, Timer_ConnectRetry);
627 else if (peer->passive || peer->conf.passive ||
628 peer->conf.template) {
629 change_state(peer, STATE_ACTIVE, event);
630 timer_stop(&peer->timers, Timer_ConnectRetry);
631 } else {
632 change_state(peer, STATE_CONNECT, event);
633 timer_set(&peer->timers, Timer_ConnectRetry,
634 conf->connectretry);
635 session_connect(peer);
636 }
637 peer->passive = 0;
638 break;
639 case EVNT_STOP:
640 timer_stop(&peer->timers, Timer_IdleHold);
641 break;
642 default:
643 /* ignore */
644 break;
645 }
646 break;
647 case STATE_CONNECT:
648 switch (event) {
649 case EVNT_START:
650 /* ignore */
651 break;
652 case EVNT_CON_OPEN:
653 session_tcp_established(peer);
654 session_open(peer);
655 timer_stop(&peer->timers, Timer_ConnectRetry);
656 peer->holdtime = INTERVAL_HOLD_INITIAL;
657 start_timer_holdtime(peer);
658 change_state(peer, STATE_OPENSENT, event);
659 break;
660 case EVNT_CON_OPENFAIL:
661 timer_set(&peer->timers, Timer_ConnectRetry,
662 conf->connectretry);
663 session_close_connection(peer);
664 change_state(peer, STATE_ACTIVE, event);
665 break;
666 case EVNT_TIMER_CONNRETRY:
667 timer_set(&peer->timers, Timer_ConnectRetry,
668 conf->connectretry);
669 session_connect(peer);
670 break;
671 default:
672 change_state(peer, STATE_IDLE, event);
673 break;
674 }
675 break;
676 case STATE_ACTIVE:
677 switch (event) {
678 case EVNT_START:
679 /* ignore */
680 break;
681 case EVNT_CON_OPEN:
682 session_tcp_established(peer);
683 session_open(peer);
684 timer_stop(&peer->timers, Timer_ConnectRetry);
685 peer->holdtime = INTERVAL_HOLD_INITIAL;
686 start_timer_holdtime(peer);
687 change_state(peer, STATE_OPENSENT, event);
688 break;
689 case EVNT_CON_OPENFAIL:
690 timer_set(&peer->timers, Timer_ConnectRetry,
691 conf->connectretry);
692 session_close_connection(peer);
693 change_state(peer, STATE_ACTIVE, event);
694 break;
695 case EVNT_TIMER_CONNRETRY:
696 timer_set(&peer->timers, Timer_ConnectRetry,
697 peer->holdtime);
698 change_state(peer, STATE_CONNECT, event);
699 session_connect(peer);
700 break;
701 default:
702 change_state(peer, STATE_IDLE, event);
703 break;
704 }
705 break;
706 case STATE_OPENSENT:
707 switch (event) {
708 case EVNT_START:
709 /* ignore */
710 break;
711 case EVNT_STOP:
712 change_state(peer, STATE_IDLE, event);
713 break;
714 case EVNT_CON_CLOSED:
715 session_close_connection(peer);
716 timer_set(&peer->timers, Timer_ConnectRetry,
717 conf->connectretry);
718 change_state(peer, STATE_ACTIVE, event);
719 break;
720 case EVNT_CON_FATAL:
721 change_state(peer, STATE_IDLE, event);
722 break;
723 case EVNT_TIMER_HOLDTIME:
724 session_notification(peer, ERR_HOLDTIMEREXPIRED,
725 0, NULL);
726 change_state(peer, STATE_IDLE, event);
727 break;
728 case EVNT_TIMER_SENDHOLD:
729 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
730 0, NULL);
731 change_state(peer, STATE_IDLE, event);
732 break;
733 case EVNT_RCVD_OPEN:
734 /* parse_open calls change_state itself on failure */
735 if (parse_open(peer, msg))
736 break;
737 session_keepalive(peer);
738 change_state(peer, STATE_OPENCONFIRM, event);
739 break;
740 case EVNT_RCVD_NOTIFICATION:
741 parse_notification(peer, msg);
742 break;
743 default:
744 session_notification(peer,
745 ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL);
746 change_state(peer, STATE_IDLE, event);
747 break;
748 }
749 break;
750 case STATE_OPENCONFIRM:
751 switch (event) {
752 case EVNT_START:
753 /* ignore */
754 break;
755 case EVNT_STOP:
756 change_state(peer, STATE_IDLE, event);
757 break;
758 case EVNT_CON_CLOSED:
759 case EVNT_CON_FATAL:
760 change_state(peer, STATE_IDLE, event);
761 break;
762 case EVNT_TIMER_HOLDTIME:
763 session_notification(peer, ERR_HOLDTIMEREXPIRED,
764 0, NULL);
765 change_state(peer, STATE_IDLE, event);
766 break;
767 case EVNT_TIMER_SENDHOLD:
768 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
769 0, NULL);
770 change_state(peer, STATE_IDLE, event);
771 break;
772 case EVNT_TIMER_KEEPALIVE:
773 session_keepalive(peer);
774 break;
775 case EVNT_RCVD_KEEPALIVE:
776 start_timer_holdtime(peer);
777 change_state(peer, STATE_ESTABLISHED, event);
778 break;
779 case EVNT_RCVD_NOTIFICATION:
780 parse_notification(peer, msg);
781 break;
782 default:
783 session_notification(peer,
784 ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL);
785 change_state(peer, STATE_IDLE, event);
786 break;
787 }
788 break;
789 case STATE_ESTABLISHED:
790 switch (event) {
791 case EVNT_START:
792 /* ignore */
793 break;
794 case EVNT_STOP:
795 change_state(peer, STATE_IDLE, event);
796 break;
797 case EVNT_CON_CLOSED:
798 case EVNT_CON_FATAL:
799 change_state(peer, STATE_IDLE, event);
800 break;
801 case EVNT_TIMER_HOLDTIME:
802 session_notification(peer, ERR_HOLDTIMEREXPIRED,
803 0, NULL);
804 change_state(peer, STATE_IDLE, event);
805 break;
806 case EVNT_TIMER_SENDHOLD:
807 session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
808 0, NULL);
809 change_state(peer, STATE_IDLE, event);
810 break;
811 case EVNT_TIMER_KEEPALIVE:
812 session_keepalive(peer);
813 break;
814 case EVNT_RCVD_KEEPALIVE:
815 start_timer_holdtime(peer);
816 break;
817 case EVNT_RCVD_UPDATE:
818 start_timer_holdtime(peer);
819 if (parse_update(peer, msg))
820 change_state(peer, STATE_IDLE, event);
821 else
822 start_timer_holdtime(peer);
823 break;
824 case EVNT_RCVD_NOTIFICATION:
825 parse_notification(peer, msg);
826 break;
827 default:
828 session_notification(peer,
829 ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL);
830 change_state(peer, STATE_IDLE, event);
831 break;
832 }
833 break;
834 }
835 }
836
837 void
start_timer_holdtime(struct peer * peer)838 start_timer_holdtime(struct peer *peer)
839 {
840 if (peer->holdtime > 0)
841 timer_set(&peer->timers, Timer_Hold, peer->holdtime);
842 else
843 timer_stop(&peer->timers, Timer_Hold);
844 }
845
846 void
start_timer_sendholdtime(struct peer * peer)847 start_timer_sendholdtime(struct peer *peer)
848 {
849 uint16_t holdtime = INTERVAL_HOLD;
850
851 if (peer->holdtime > INTERVAL_HOLD)
852 holdtime = peer->holdtime;
853
854 if (peer->holdtime > 0)
855 timer_set(&peer->timers, Timer_SendHold, holdtime);
856 else
857 timer_stop(&peer->timers, Timer_SendHold);
858 }
859
860 void
start_timer_keepalive(struct peer * peer)861 start_timer_keepalive(struct peer *peer)
862 {
863 if (peer->holdtime > 0)
864 timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3);
865 else
866 timer_stop(&peer->timers, Timer_Keepalive);
867 }
868
869 void
session_close_connection(struct peer * peer)870 session_close_connection(struct peer *peer)
871 {
872 if (peer->fd != -1) {
873 close(peer->fd);
874 pauseaccept = 0;
875 }
876 peer->fd = -1;
877 }
878
879 void
change_state(struct peer * peer,enum session_state state,enum session_events event)880 change_state(struct peer *peer, enum session_state state,
881 enum session_events event)
882 {
883 struct mrt *mrt;
884
885 switch (state) {
886 case STATE_IDLE:
887 /* carp demotion first. new peers handled in init_peer */
888 if (peer->state == STATE_ESTABLISHED &&
889 peer->conf.demote_group[0] && !peer->demoted)
890 session_demote(peer, +1);
891
892 /*
893 * try to write out what's buffered (maybe a notification),
894 * don't bother if it fails
895 */
896 if (peer->state >= STATE_OPENSENT &&
897 msgbuf_queuelen(peer->wbuf) > 0)
898 ibuf_write(peer->fd, peer->wbuf);
899
900 /*
901 * we must start the timer for the next EVNT_START
902 * if we are coming here due to an error and the
903 * session was not established successfully before, the
904 * starttimerinterval needs to be exponentially increased
905 */
906 if (peer->IdleHoldTime == 0)
907 peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
908 peer->holdtime = INTERVAL_HOLD_INITIAL;
909 timer_stop(&peer->timers, Timer_ConnectRetry);
910 timer_stop(&peer->timers, Timer_Keepalive);
911 timer_stop(&peer->timers, Timer_Hold);
912 timer_stop(&peer->timers, Timer_SendHold);
913 timer_stop(&peer->timers, Timer_IdleHold);
914 timer_stop(&peer->timers, Timer_IdleHoldReset);
915 session_close_connection(peer);
916 msgbuf_clear(peer->wbuf);
917 peer->rpending = 0;
918 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
919 if (!peer->template)
920 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
921 peer->conf.id, 0, -1, NULL, 0);
922
923 if (peer->state == STATE_ESTABLISHED) {
924 if (peer->capa.neg.grestart.restart == 2 &&
925 (event == EVNT_CON_CLOSED ||
926 event == EVNT_CON_FATAL ||
927 (peer->capa.neg.grestart.grnotification &&
928 (event == EVNT_RCVD_GRACE_NOTIFICATION ||
929 event == EVNT_TIMER_HOLDTIME ||
930 event == EVNT_TIMER_SENDHOLD)))) {
931 /* don't punish graceful restart */
932 timer_set(&peer->timers, Timer_IdleHold, 0);
933 session_graceful_restart(peer);
934 } else if (event != EVNT_STOP) {
935 timer_set(&peer->timers, Timer_IdleHold,
936 peer->IdleHoldTime);
937 if (event != EVNT_NONE &&
938 peer->IdleHoldTime < MAX_IDLE_HOLD/2)
939 peer->IdleHoldTime *= 2;
940 session_down(peer);
941 } else {
942 session_down(peer);
943 }
944 } else if (event != EVNT_STOP) {
945 timer_set(&peer->timers, Timer_IdleHold,
946 peer->IdleHoldTime);
947 if (event != EVNT_NONE &&
948 peer->IdleHoldTime < MAX_IDLE_HOLD / 2)
949 peer->IdleHoldTime *= 2;
950 }
951
952 if (peer->state == STATE_NONE ||
953 peer->state == STATE_ESTABLISHED) {
954 /* initialize capability negotiation structures */
955 memcpy(&peer->capa.ann, &peer->conf.capabilities,
956 sizeof(peer->capa.ann));
957 }
958 break;
959 case STATE_CONNECT:
960 if (peer->state == STATE_ESTABLISHED &&
961 peer->capa.neg.grestart.restart == 2) {
962 /* do the graceful restart dance */
963 session_graceful_restart(peer);
964 peer->holdtime = INTERVAL_HOLD_INITIAL;
965 timer_stop(&peer->timers, Timer_ConnectRetry);
966 timer_stop(&peer->timers, Timer_Keepalive);
967 timer_stop(&peer->timers, Timer_Hold);
968 timer_stop(&peer->timers, Timer_SendHold);
969 timer_stop(&peer->timers, Timer_IdleHold);
970 timer_stop(&peer->timers, Timer_IdleHoldReset);
971 session_close_connection(peer);
972 msgbuf_clear(peer->wbuf);
973 memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
974 }
975 break;
976 case STATE_ACTIVE:
977 if (!peer->template)
978 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
979 peer->conf.id, 0, -1, NULL, 0);
980 break;
981 case STATE_OPENSENT:
982 break;
983 case STATE_OPENCONFIRM:
984 break;
985 case STATE_ESTABLISHED:
986 timer_set(&peer->timers, Timer_IdleHoldReset,
987 peer->IdleHoldTime);
988 if (peer->demoted)
989 timer_set(&peer->timers, Timer_CarpUndemote,
990 INTERVAL_HOLD_DEMOTED);
991 session_up(peer);
992 break;
993 default: /* something seriously fucked */
994 break;
995 }
996
997 log_statechange(peer, state, event);
998 LIST_FOREACH(mrt, &mrthead, entry) {
999 if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
1000 continue;
1001 if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1002 mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1003 mrt->group_id == peer->conf.groupid))
1004 mrt_dump_state(mrt, peer->state, state, peer);
1005 }
1006 peer->prev_state = peer->state;
1007 peer->state = state;
1008 }
1009
1010 void
session_accept(int listenfd)1011 session_accept(int listenfd)
1012 {
1013 int connfd;
1014 socklen_t len;
1015 struct sockaddr_storage cliaddr;
1016 struct peer *p = NULL;
1017
1018 len = sizeof(cliaddr);
1019 if ((connfd = accept4(listenfd,
1020 (struct sockaddr *)&cliaddr, &len,
1021 SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
1022 if (errno == ENFILE || errno == EMFILE)
1023 pauseaccept = getmonotime();
1024 else if (errno != EWOULDBLOCK && errno != EINTR &&
1025 errno != ECONNABORTED)
1026 log_warn("accept");
1027 return;
1028 }
1029
1030 p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
1031
1032 if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1033 if (timer_running(&p->timers, Timer_IdleHold, NULL)) {
1034 /* fast reconnect after clear */
1035 p->passive = 1;
1036 bgp_fsm(p, EVNT_START, NULL);
1037 }
1038 }
1039
1040 if (p != NULL &&
1041 (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1042 if (p->fd != -1) {
1043 if (p->state == STATE_CONNECT)
1044 session_close_connection(p);
1045 else {
1046 close(connfd);
1047 return;
1048 }
1049 }
1050
1051 open:
1052 if (p->auth_conf.method != AUTH_NONE && sysdep.no_pfkey) {
1053 log_peer_warnx(&p->conf,
1054 "ipsec or md5sig configured but not available");
1055 close(connfd);
1056 return;
1057 }
1058
1059 if (tcp_md5_check(connfd, &p->auth_conf) == -1) {
1060 log_peer_warn(&p->conf, "check md5sig");
1061 close(connfd);
1062 return;
1063 }
1064 p->fd = connfd;
1065 if (session_setup_socket(p)) {
1066 close(connfd);
1067 return;
1068 }
1069 bgp_fsm(p, EVNT_CON_OPEN, NULL);
1070 return;
1071 } else if (p != NULL && p->state == STATE_ESTABLISHED &&
1072 p->capa.neg.grestart.restart == 2) {
1073 /* first do the graceful restart dance */
1074 change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1075 /* then do part of the open dance */
1076 goto open;
1077 } else {
1078 log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1079 close(connfd);
1080 }
1081 }
1082
1083 int
session_connect(struct peer * peer)1084 session_connect(struct peer *peer)
1085 {
1086 struct sockaddr *sa;
1087 struct bgpd_addr *bind_addr;
1088 socklen_t sa_len;
1089
1090 /*
1091 * we do not need the overcomplicated collision detection RFC 1771
1092 * describes; we simply make sure there is only ever one concurrent
1093 * tcp connection per peer.
1094 */
1095 if (peer->fd != -1)
1096 return (-1);
1097
1098 if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1099 SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1100 log_peer_warn(&peer->conf, "session_connect socket");
1101 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1102 return (-1);
1103 }
1104
1105 if (peer->auth_conf.method != AUTH_NONE && sysdep.no_pfkey) {
1106 log_peer_warnx(&peer->conf,
1107 "ipsec or md5sig configured but not available");
1108 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1109 return (-1);
1110 }
1111
1112 if (tcp_md5_set(peer->fd, &peer->auth_conf,
1113 &peer->conf.remote_addr) == -1)
1114 log_peer_warn(&peer->conf, "setting md5sig");
1115
1116 /* if local-address is set we need to bind() */
1117 bind_addr = session_localaddr(peer);
1118 if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1119 if (bind(peer->fd, sa, sa_len) == -1) {
1120 log_peer_warn(&peer->conf, "session_connect bind");
1121 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1122 return (-1);
1123 }
1124 }
1125
1126 if (session_setup_socket(peer)) {
1127 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1128 return (-1);
1129 }
1130
1131 sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len);
1132 if (connect(peer->fd, sa, sa_len) == -1) {
1133 if (errno != EINPROGRESS) {
1134 if (errno != peer->lasterr)
1135 log_peer_warn(&peer->conf, "connect");
1136 peer->lasterr = errno;
1137 bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1138 return (-1);
1139 }
1140 } else
1141 bgp_fsm(peer, EVNT_CON_OPEN, NULL);
1142
1143 return (0);
1144 }
1145
1146 int
session_setup_socket(struct peer * p)1147 session_setup_socket(struct peer *p)
1148 {
1149 int ttl = p->conf.distance;
1150 int pre = IPTOS_PREC_INTERNETCONTROL;
1151 int nodelay = 1;
1152 int bsize;
1153
1154 switch (p->conf.remote_addr.aid) {
1155 case AID_INET:
1156 /* set precedence, see RFC 1771 appendix 5 */
1157 if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1158 -1) {
1159 log_peer_warn(&p->conf,
1160 "session_setup_socket setsockopt TOS");
1161 return (-1);
1162 }
1163
1164 if (p->conf.ebgp) {
1165 /*
1166 * set TTL to foreign router's distance
1167 * 1=direct n=multihop with ttlsec, we always use 255
1168 */
1169 if (p->conf.ttlsec) {
1170 ttl = 256 - p->conf.distance;
1171 if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1172 &ttl, sizeof(ttl)) == -1) {
1173 log_peer_warn(&p->conf,
1174 "session_setup_socket: "
1175 "setsockopt MINTTL");
1176 return (-1);
1177 }
1178 ttl = 255;
1179 }
1180
1181 if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1182 sizeof(ttl)) == -1) {
1183 log_peer_warn(&p->conf,
1184 "session_setup_socket setsockopt TTL");
1185 return (-1);
1186 }
1187 }
1188 break;
1189 case AID_INET6:
1190 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre,
1191 sizeof(pre)) == -1) {
1192 log_peer_warn(&p->conf, "session_setup_socket "
1193 "setsockopt TCLASS");
1194 return (-1);
1195 }
1196
1197 if (p->conf.ebgp) {
1198 /*
1199 * set hoplimit to foreign router's distance
1200 * 1=direct n=multihop with ttlsec, we always use 255
1201 */
1202 if (p->conf.ttlsec) {
1203 ttl = 256 - p->conf.distance;
1204 if (setsockopt(p->fd, IPPROTO_IPV6,
1205 IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1206 == -1) {
1207 log_peer_warn(&p->conf,
1208 "session_setup_socket: "
1209 "setsockopt MINHOPCOUNT");
1210 return (-1);
1211 }
1212 ttl = 255;
1213 }
1214 if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1215 &ttl, sizeof(ttl)) == -1) {
1216 log_peer_warn(&p->conf,
1217 "session_setup_socket setsockopt hoplimit");
1218 return (-1);
1219 }
1220 }
1221 break;
1222 }
1223
1224 /* set TCP_NODELAY */
1225 if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1226 sizeof(nodelay)) == -1) {
1227 log_peer_warn(&p->conf,
1228 "session_setup_socket setsockopt TCP_NODELAY");
1229 return (-1);
1230 }
1231
1232 /* limit bufsize. no biggie if it fails */
1233 bsize = 65535;
1234 setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, sizeof(bsize));
1235 setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, sizeof(bsize));
1236
1237 return (0);
1238 }
1239
1240 /*
1241 * compare the bgpd_addr with the sockaddr by converting the latter into
1242 * a bgpd_addr. Return true if the two are equal, including any scope
1243 */
1244 static int
sa_equal(struct bgpd_addr * ba,struct sockaddr * b)1245 sa_equal(struct bgpd_addr *ba, struct sockaddr *b)
1246 {
1247 struct bgpd_addr bb;
1248
1249 sa2addr(b, &bb, NULL);
1250 return (memcmp(ba, &bb, sizeof(*ba)) == 0);
1251 }
1252
1253 static void
get_alternate_addr(struct bgpd_addr * local,struct bgpd_addr * remote,struct bgpd_addr * alt,unsigned int * scope)1254 get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote,
1255 struct bgpd_addr *alt, unsigned int *scope)
1256 {
1257 struct ifaddrs *ifap, *ifa, *match;
1258 int connected = 0;
1259 u_int8_t plen;
1260
1261 if (getifaddrs(&ifap) == -1)
1262 fatal("getifaddrs");
1263
1264 for (match = ifap; match != NULL; match = match->ifa_next) {
1265 if (match->ifa_addr == NULL)
1266 continue;
1267 if (match->ifa_addr->sa_family != AF_INET &&
1268 match->ifa_addr->sa_family != AF_INET6)
1269 continue;
1270 if (sa_equal(local, match->ifa_addr)) {
1271 if (remote->aid == AID_INET6 &&
1272 IN6_IS_ADDR_LINKLOCAL(&remote->v6)) {
1273 /* IPv6 LLA are by definition connected */
1274 connected = 1;
1275 } else if (match->ifa_flags & IFF_POINTOPOINT &&
1276 match->ifa_dstaddr != NULL) {
1277 if (sa_equal(remote, match->ifa_dstaddr))
1278 connected = 1;
1279 } else if (match->ifa_netmask != NULL) {
1280 plen = mask2prefixlen(
1281 match->ifa_addr->sa_family,
1282 match->ifa_netmask);
1283 if (prefix_compare(local, remote, plen) == 0)
1284 connected = 1;
1285 }
1286 break;
1287 }
1288 }
1289
1290 if (match == NULL) {
1291 log_warnx("%s: local address not found", __func__);
1292 return;
1293 }
1294 if (connected)
1295 *scope = if_nametoindex(match->ifa_name);
1296 else
1297 *scope = 0;
1298
1299 switch (local->aid) {
1300 case AID_INET6:
1301 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1302 if (ifa->ifa_addr != NULL &&
1303 ifa->ifa_addr->sa_family == AF_INET &&
1304 strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1305 sa2addr(ifa->ifa_addr, alt, NULL);
1306 break;
1307 }
1308 }
1309 break;
1310 case AID_INET:
1311 for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1312 if (ifa->ifa_addr != NULL &&
1313 ifa->ifa_addr->sa_family == AF_INET6 &&
1314 strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1315 struct sockaddr_in6 *s =
1316 (struct sockaddr_in6 *)ifa->ifa_addr;
1317
1318 /* only accept global scope addresses */
1319 if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1320 IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1321 continue;
1322 sa2addr(ifa->ifa_addr, alt, NULL);
1323 break;
1324 }
1325 }
1326 break;
1327 default:
1328 log_warnx("%s: unsupported address family %s", __func__,
1329 aid2str(local->aid));
1330 break;
1331 }
1332
1333 freeifaddrs(ifap);
1334 }
1335
1336 void
session_tcp_established(struct peer * peer)1337 session_tcp_established(struct peer *peer)
1338 {
1339 struct sockaddr_storage ss;
1340 socklen_t len;
1341
1342 len = sizeof(ss);
1343 if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1344 log_warn("getsockname");
1345 sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1346 len = sizeof(ss);
1347 if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1348 log_warn("getpeername");
1349 sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1350
1351 get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt,
1352 &peer->if_scope);
1353 }
1354
1355 int
session_capa_add(struct ibuf * opb,uint8_t capa_code,uint8_t capa_len)1356 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len)
1357 {
1358 int errs = 0;
1359
1360 errs += ibuf_add_n8(opb, capa_code);
1361 errs += ibuf_add_n8(opb, capa_len);
1362 return (errs);
1363 }
1364
1365 static int
session_capa_add_mp(struct ibuf * buf,uint8_t aid)1366 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
1367 {
1368 uint16_t afi;
1369 uint8_t safi;
1370 int errs = 0;
1371
1372 if (aid2afi(aid, &afi, &safi) == -1) {
1373 log_warn("%s: bad AID", __func__);
1374 return (-1);
1375 }
1376
1377 errs += ibuf_add_n16(buf, afi);
1378 errs += ibuf_add_zero(buf, 1);
1379 errs += ibuf_add_n8(buf, safi);
1380
1381 return (errs);
1382 }
1383
1384 static int
session_capa_add_afi(struct ibuf * b,uint8_t aid,uint8_t flags)1385 session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
1386 {
1387 int errs = 0;
1388 uint16_t afi;
1389 uint8_t safi;
1390
1391 if (aid2afi(aid, &afi, &safi)) {
1392 log_warn("%s: bad AID", __func__);
1393 return (-1);
1394 }
1395
1396 errs += ibuf_add_n16(b, afi);
1397 errs += ibuf_add_n8(b, safi);
1398 errs += ibuf_add_n8(b, flags);
1399
1400 return (errs);
1401 }
1402
1403 static int
session_capa_add_ext_nh(struct ibuf * b,uint8_t aid)1404 session_capa_add_ext_nh(struct ibuf *b, uint8_t aid)
1405 {
1406 int errs = 0;
1407 uint16_t afi;
1408 uint8_t safi;
1409
1410 if (aid2afi(aid, &afi, &safi)) {
1411 log_warn("%s: bad AID", __func__);
1412 return (-1);
1413 }
1414
1415 errs += ibuf_add_n16(b, afi);
1416 errs += ibuf_add_n16(b, safi);
1417 errs += ibuf_add_n16(b, AFI_IPv6);
1418
1419 return (errs);
1420 }
1421
1422 struct ibuf *
session_newmsg(enum msg_type msgtype,uint16_t len)1423 session_newmsg(enum msg_type msgtype, uint16_t len)
1424 {
1425 struct ibuf *buf;
1426 int errs = 0;
1427
1428 if ((buf = ibuf_open(len)) == NULL)
1429 return (NULL);
1430
1431 errs += ibuf_add(buf, marker, sizeof(marker));
1432 errs += ibuf_add_n16(buf, len);
1433 errs += ibuf_add_n8(buf, msgtype);
1434
1435 if (errs) {
1436 ibuf_free(buf);
1437 return (NULL);
1438 }
1439
1440 return (buf);
1441 }
1442
1443 void
session_sendmsg(struct ibuf * msg,struct peer * p,enum msg_type msgtype)1444 session_sendmsg(struct ibuf *msg, struct peer *p, enum msg_type msgtype)
1445 {
1446 struct mrt *mrt;
1447
1448 LIST_FOREACH(mrt, &mrthead, entry) {
1449 if (!(mrt->type == MRT_ALL_OUT || (msgtype == BGP_UPDATE &&
1450 mrt->type == MRT_UPDATE_OUT)))
1451 continue;
1452 if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1453 mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1454 mrt->group_id == p->conf.groupid))
1455 mrt_dump_bgp_msg(mrt, msg, p, msgtype);
1456 }
1457
1458 ibuf_close(p->wbuf, msg);
1459 if (!p->throttled && msgbuf_queuelen(p->wbuf) > SESS_MSG_HIGH_MARK) {
1460 if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1461 log_peer_warn(&p->conf, "imsg_compose XOFF");
1462 else
1463 p->throttled = 1;
1464 }
1465 }
1466
1467 /*
1468 * Translate between internal roles and the value expected by RFC 9234.
1469 */
1470 static uint8_t
role2capa(enum role role)1471 role2capa(enum role role)
1472 {
1473 switch (role) {
1474 case ROLE_CUSTOMER:
1475 return CAPA_ROLE_CUSTOMER;
1476 case ROLE_PROVIDER:
1477 return CAPA_ROLE_PROVIDER;
1478 case ROLE_RS:
1479 return CAPA_ROLE_RS;
1480 case ROLE_RS_CLIENT:
1481 return CAPA_ROLE_RS_CLIENT;
1482 case ROLE_PEER:
1483 return CAPA_ROLE_PEER;
1484 default:
1485 fatalx("Unsupported role for role capability");
1486 }
1487 }
1488
1489 static enum role
capa2role(uint8_t val)1490 capa2role(uint8_t val)
1491 {
1492 switch (val) {
1493 case CAPA_ROLE_PROVIDER:
1494 return ROLE_PROVIDER;
1495 case CAPA_ROLE_RS:
1496 return ROLE_RS;
1497 case CAPA_ROLE_RS_CLIENT:
1498 return ROLE_RS_CLIENT;
1499 case CAPA_ROLE_CUSTOMER:
1500 return ROLE_CUSTOMER;
1501 case CAPA_ROLE_PEER:
1502 return ROLE_PEER;
1503 default:
1504 return ROLE_NONE;
1505 }
1506 }
1507
1508 void
session_open(struct peer * p)1509 session_open(struct peer *p)
1510 {
1511 struct ibuf *buf, *opb;
1512 size_t len, optparamlen;
1513 uint16_t holdtime;
1514 uint8_t i;
1515 int errs = 0, extlen = 0;
1516 int mpcapa = 0;
1517
1518
1519 if ((opb = ibuf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN - 6)) ==
1520 NULL) {
1521 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1522 return;
1523 }
1524
1525 /* multiprotocol extensions, RFC 4760 */
1526 for (i = AID_MIN; i < AID_MAX; i++)
1527 if (p->capa.ann.mp[i]) { /* 4 bytes data */
1528 errs += session_capa_add(opb, CAPA_MP, 4);
1529 errs += session_capa_add_mp(opb, i);
1530 mpcapa++;
1531 }
1532
1533 /* route refresh, RFC 2918 */
1534 if (p->capa.ann.refresh) /* no data */
1535 errs += session_capa_add(opb, CAPA_REFRESH, 0);
1536
1537 /* extended nexthop encoding, RFC 8950 */
1538 if (p->capa.ann.ext_nh[AID_INET]) {
1539 uint8_t enhlen = 0;
1540
1541 if (p->capa.ann.mp[AID_INET])
1542 enhlen += 6;
1543 if (p->capa.ann.mp[AID_VPN_IPv4])
1544 enhlen += 6;
1545 errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen);
1546 if (p->capa.ann.mp[AID_INET])
1547 errs += session_capa_add_ext_nh(opb, AID_INET);
1548 if (p->capa.ann.mp[AID_VPN_IPv4])
1549 errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4);
1550 }
1551
1552 /* extended message support, RFC 8654 */
1553 if (p->capa.ann.ext_msg) /* no data */
1554 errs += session_capa_add(opb, CAPA_EXT_MSG, 0);
1555
1556 /* BGP open policy, RFC 9234, only for ebgp sessions */
1557 if (p->conf.ebgp && p->capa.ann.policy &&
1558 p->conf.role != ROLE_NONE &&
1559 (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
1560 mpcapa == 0)) {
1561 errs += session_capa_add(opb, CAPA_ROLE, 1);
1562 errs += ibuf_add_n8(opb, role2capa(p->conf.role));
1563 }
1564
1565 /* graceful restart and End-of-RIB marker, RFC 4724 */
1566 if (p->capa.ann.grestart.restart) {
1567 int rst = 0;
1568 uint16_t hdr = 0;
1569
1570 for (i = AID_MIN; i < AID_MAX; i++) {
1571 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
1572 rst++;
1573 }
1574
1575 /* Only set the R-flag if no graceful restart is ongoing */
1576 if (!rst)
1577 hdr |= CAPA_GR_R_FLAG;
1578 if (p->capa.ann.grestart.grnotification)
1579 hdr |= CAPA_GR_N_FLAG;
1580 errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
1581 errs += ibuf_add_n16(opb, hdr);
1582 }
1583
1584 /* 4-bytes AS numbers, RFC6793 */
1585 if (p->capa.ann.as4byte) { /* 4 bytes data */
1586 errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t));
1587 errs += ibuf_add_n32(opb, p->conf.local_as);
1588 }
1589
1590 /* advertisement of multiple paths, RFC7911 */
1591 if (p->capa.ann.add_path[AID_MIN]) { /* variable */
1592 uint8_t aplen;
1593
1594 if (mpcapa)
1595 aplen = 4 * mpcapa;
1596 else /* AID_INET */
1597 aplen = 4;
1598 errs += session_capa_add(opb, CAPA_ADD_PATH, aplen);
1599 if (mpcapa) {
1600 for (i = AID_MIN; i < AID_MAX; i++) {
1601 if (p->capa.ann.mp[i]) {
1602 errs += session_capa_add_afi(opb,
1603 i, p->capa.ann.add_path[i] &
1604 CAPA_AP_MASK);
1605 }
1606 }
1607 } else { /* AID_INET */
1608 errs += session_capa_add_afi(opb, AID_INET,
1609 p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK);
1610 }
1611 }
1612
1613 /* enhanced route-refresh, RFC7313 */
1614 if (p->capa.ann.enhanced_rr) /* no data */
1615 errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0);
1616
1617 if (errs) {
1618 ibuf_free(opb);
1619 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1620 return;
1621 }
1622
1623 optparamlen = ibuf_size(opb);
1624 len = MSGSIZE_OPEN_MIN + optparamlen;
1625 if (optparamlen == 0) {
1626 /* nothing */
1627 } else if (optparamlen + 2 >= 255) {
1628 /* RFC9072: use 255 as magic size and request extra header */
1629 optparamlen = 255;
1630 extlen = 1;
1631 /* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */
1632 len += 2 * 3;
1633 } else {
1634 /* regular capabilities header */
1635 optparamlen += 2;
1636 len += 2;
1637 }
1638
1639 if ((buf = session_newmsg(BGP_OPEN, len)) == NULL) {
1640 ibuf_free(opb);
1641 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1642 return;
1643 }
1644
1645 if (p->conf.holdtime)
1646 holdtime = p->conf.holdtime;
1647 else
1648 holdtime = conf->holdtime;
1649
1650 errs += ibuf_add_n8(buf, 4);
1651 errs += ibuf_add_n16(buf, p->conf.local_short_as);
1652 errs += ibuf_add_n16(buf, holdtime);
1653 /* is already in network byte order */
1654 errs += ibuf_add_n32(buf, conf->bgpid);
1655 errs += ibuf_add_n8(buf, optparamlen);
1656
1657 if (extlen) {
1658 /* RFC9072 extra header which spans over the capabilities hdr */
1659 errs += ibuf_add_n8(buf, OPT_PARAM_EXT_LEN);
1660 errs += ibuf_add_n16(buf, ibuf_size(opb) + 1 + 2);
1661 }
1662
1663 if (optparamlen) {
1664 errs += ibuf_add_n8(buf, OPT_PARAM_CAPABILITIES);
1665
1666 if (extlen) {
1667 /* RFC9072: 2-byte extended length */
1668 errs += ibuf_add_n16(buf, ibuf_size(opb));
1669 } else {
1670 errs += ibuf_add_n8(buf, ibuf_size(opb));
1671 }
1672 errs += ibuf_add_ibuf(buf, opb);
1673 }
1674
1675 ibuf_free(opb);
1676
1677 if (errs) {
1678 ibuf_free(buf);
1679 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1680 return;
1681 }
1682
1683 session_sendmsg(buf, p, BGP_OPEN);
1684 p->stats.msg_sent_open++;
1685 }
1686
1687 void
session_keepalive(struct peer * p)1688 session_keepalive(struct peer *p)
1689 {
1690 struct ibuf *buf;
1691
1692 if ((buf = session_newmsg(BGP_KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL) {
1693 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1694 return;
1695 }
1696
1697 session_sendmsg(buf, p, BGP_KEEPALIVE);
1698 start_timer_keepalive(p);
1699 p->stats.msg_sent_keepalive++;
1700 }
1701
1702 void
session_update(uint32_t peerid,struct ibuf * ibuf)1703 session_update(uint32_t peerid, struct ibuf *ibuf)
1704 {
1705 struct peer *p;
1706 struct ibuf *buf;
1707 size_t len, maxsize = MAX_PKTSIZE;
1708
1709 if ((p = getpeerbyid(conf, peerid)) == NULL) {
1710 log_warnx("%s: no such peer: id=%u", __func__, peerid);
1711 return;
1712 }
1713
1714 if (p->state != STATE_ESTABLISHED)
1715 return;
1716
1717 if (p->capa.neg.ext_msg)
1718 maxsize = MAX_EXT_PKTSIZE;
1719 len = ibuf_size(ibuf);
1720 if (len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER ||
1721 len > maxsize - MSGSIZE_HEADER) {
1722 log_peer_warnx(&p->conf, "bad UDPATE from RDE");
1723 return;
1724 }
1725
1726 if ((buf = session_newmsg(BGP_UPDATE, MSGSIZE_HEADER + len)) == NULL) {
1727 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1728 return;
1729 }
1730
1731 if (ibuf_add_ibuf(buf, ibuf)) {
1732 ibuf_free(buf);
1733 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1734 return;
1735 }
1736
1737 session_sendmsg(buf, p, BGP_UPDATE);
1738 start_timer_keepalive(p);
1739 p->stats.msg_sent_update++;
1740 }
1741
1742 /* Return 1 if a hard reset should be issued, 0 for a graceful notification */
1743 static int
session_req_hard_reset(enum err_codes errcode,uint8_t subcode)1744 session_req_hard_reset(enum err_codes errcode, uint8_t subcode)
1745 {
1746 switch (errcode) {
1747 case ERR_HEADER:
1748 case ERR_OPEN:
1749 case ERR_UPDATE:
1750 case ERR_FSM:
1751 case ERR_RREFRESH:
1752 /*
1753 * Protocol errors trigger a hard reset. The peer
1754 * is not trustworthy and so there is no realistic
1755 * hope that forwarding can continue.
1756 */
1757 break;
1758 case ERR_HOLDTIMEREXPIRED:
1759 case ERR_SENDHOLDTIMEREXPIRED:
1760 /* Keep forwarding and hope the other side is back soon. */
1761 return 0;
1762 case ERR_CEASE:
1763 switch (subcode) {
1764 case ERR_CEASE_CONN_REJECT:
1765 case ERR_CEASE_OTHER_CHANGE:
1766 case ERR_CEASE_COLLISION:
1767 case ERR_CEASE_RSRC_EXHAUST:
1768 /* Per RFC8538 suggestion make these graceful. */
1769 return 0;
1770 }
1771 break;
1772 }
1773 return 1;
1774 }
1775
1776 void
session_notification_data(struct peer * p,uint8_t errcode,uint8_t subcode,void * data,size_t datalen)1777 session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode,
1778 void *data, size_t datalen)
1779 {
1780 struct ibuf ibuf;
1781
1782 ibuf_from_buffer(&ibuf, data, datalen);
1783 session_notification(p, errcode, subcode, &ibuf);
1784 }
1785
1786 void
session_notification(struct peer * p,uint8_t errcode,uint8_t subcode,struct ibuf * ibuf)1787 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode,
1788 struct ibuf *ibuf)
1789 {
1790 struct ibuf *buf;
1791 const char *reason = "sending";
1792 int errs = 0, need_hard_reset = 0;
1793 size_t datalen = 0;
1794
1795 switch (p->state) {
1796 case STATE_OPENSENT:
1797 case STATE_OPENCONFIRM:
1798 case STATE_ESTABLISHED:
1799 break;
1800 default:
1801 /* session not open, no need to send notification */
1802 log_notification(p, errcode, subcode, ibuf, "dropping");
1803 return;
1804 }
1805
1806 if (p->capa.neg.grestart.grnotification) {
1807 if (session_req_hard_reset(errcode, subcode)) {
1808 need_hard_reset = 1;
1809 datalen += 2;
1810 reason = "sending hard-reset";
1811 } else {
1812 reason = "sending graceful";
1813 }
1814 }
1815
1816 log_notification(p, errcode, subcode, ibuf, reason);
1817
1818 /* cap to maximum size */
1819 if (ibuf != NULL) {
1820 if (ibuf_size(ibuf) >
1821 MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN - datalen) {
1822 log_peer_warnx(&p->conf,
1823 "oversized notification, data trunkated");
1824 ibuf_truncate(ibuf, MAX_PKTSIZE -
1825 MSGSIZE_NOTIFICATION_MIN - datalen);
1826 }
1827 datalen += ibuf_size(ibuf);
1828 }
1829
1830 if ((buf = session_newmsg(BGP_NOTIFICATION,
1831 MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1832 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1833 return;
1834 }
1835
1836 if (need_hard_reset) {
1837 errs += ibuf_add_n8(buf, ERR_CEASE);
1838 errs += ibuf_add_n8(buf, ERR_CEASE_HARD_RESET);
1839 }
1840
1841 errs += ibuf_add_n8(buf, errcode);
1842 errs += ibuf_add_n8(buf, subcode);
1843
1844 if (ibuf != NULL)
1845 errs += ibuf_add_ibuf(buf, ibuf);
1846
1847 if (errs) {
1848 ibuf_free(buf);
1849 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1850 return;
1851 }
1852
1853 session_sendmsg(buf, p, BGP_NOTIFICATION);
1854 p->stats.msg_sent_notification++;
1855 p->stats.last_sent_errcode = errcode;
1856 p->stats.last_sent_suberr = subcode;
1857 }
1858
1859 int
session_neighbor_rrefresh(struct peer * p)1860 session_neighbor_rrefresh(struct peer *p)
1861 {
1862 uint8_t i;
1863
1864 if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr))
1865 return (-1);
1866
1867 for (i = AID_MIN; i < AID_MAX; i++) {
1868 if (p->capa.neg.mp[i] != 0)
1869 session_rrefresh(p, i, ROUTE_REFRESH_REQUEST);
1870 }
1871
1872 return (0);
1873 }
1874
1875 void
session_rrefresh(struct peer * p,uint8_t aid,uint8_t subtype)1876 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype)
1877 {
1878 struct ibuf *buf;
1879 int errs = 0;
1880 uint16_t afi;
1881 uint8_t safi;
1882
1883 switch (subtype) {
1884 case ROUTE_REFRESH_REQUEST:
1885 p->stats.refresh_sent_req++;
1886 break;
1887 case ROUTE_REFRESH_BEGIN_RR:
1888 case ROUTE_REFRESH_END_RR:
1889 /* requires enhanced route refresh */
1890 if (!p->capa.neg.enhanced_rr)
1891 return;
1892 if (subtype == ROUTE_REFRESH_BEGIN_RR)
1893 p->stats.refresh_sent_borr++;
1894 else
1895 p->stats.refresh_sent_eorr++;
1896 break;
1897 default:
1898 fatalx("session_rrefresh: bad subtype %d", subtype);
1899 }
1900
1901 if (aid2afi(aid, &afi, &safi) == -1)
1902 fatalx("session_rrefresh: bad afi/safi pair");
1903
1904 if ((buf = session_newmsg(BGP_RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1905 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1906 return;
1907 }
1908
1909 errs += ibuf_add_n16(buf, afi);
1910 errs += ibuf_add_n8(buf, subtype);
1911 errs += ibuf_add_n8(buf, safi);
1912
1913 if (errs) {
1914 ibuf_free(buf);
1915 bgp_fsm(p, EVNT_CON_FATAL, NULL);
1916 return;
1917 }
1918
1919 session_sendmsg(buf, p, BGP_RREFRESH);
1920 p->stats.msg_sent_rrefresh++;
1921 }
1922
1923 int
session_graceful_restart(struct peer * p)1924 session_graceful_restart(struct peer *p)
1925 {
1926 uint8_t i;
1927 uint16_t staletime = conf->staletime;
1928
1929 if (p->conf.staletime)
1930 staletime = p->conf.staletime;
1931
1932 /* RFC 8538: enforce configurable upper bound of the stale timer */
1933 if (staletime > p->capa.neg.grestart.timeout)
1934 staletime = p->capa.neg.grestart.timeout;
1935 timer_set(&p->timers, Timer_RestartTimeout, staletime);
1936
1937 for (i = AID_MIN; i < AID_MAX; i++) {
1938 if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1939 if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1940 &i, sizeof(i)) == -1)
1941 return (-1);
1942 log_peer_warnx(&p->conf,
1943 "graceful restart of %s, keeping routes",
1944 aid2str(i));
1945 p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1946 } else if (p->capa.neg.mp[i]) {
1947 if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id,
1948 &i, sizeof(i)) == -1)
1949 return (-1);
1950 log_peer_warnx(&p->conf,
1951 "graceful restart of %s, flushing routes",
1952 aid2str(i));
1953 }
1954 }
1955 return (0);
1956 }
1957
1958 int
session_graceful_stop(struct peer * p)1959 session_graceful_stop(struct peer *p)
1960 {
1961 uint8_t i;
1962
1963 for (i = AID_MIN; i < AID_MAX; i++) {
1964 /*
1965 * Only flush if the peer is restarting and the timeout fired.
1966 * In all other cases the session was already flushed when the
1967 * session went down or when the new open message was parsed.
1968 */
1969 if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1970 log_peer_warnx(&p->conf, "graceful restart of %s, "
1971 "time-out, flushing", aid2str(i));
1972 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1973 &i, sizeof(i)) == -1)
1974 return (-1);
1975 }
1976 p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1977 }
1978 return (0);
1979 }
1980
1981 int
session_dispatch_msg(struct pollfd * pfd,struct peer * p)1982 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1983 {
1984 socklen_t len;
1985 int error;
1986
1987 if (p->state == STATE_CONNECT) {
1988 if (pfd->revents & POLLOUT) {
1989 if (pfd->revents & POLLIN) {
1990 /* error occurred */
1991 len = sizeof(error);
1992 if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1993 &error, &len) == -1 || error) {
1994 if (error)
1995 errno = error;
1996 if (errno != p->lasterr) {
1997 log_peer_warn(&p->conf,
1998 "socket error");
1999 p->lasterr = errno;
2000 }
2001 bgp_fsm(p, EVNT_CON_OPENFAIL, NULL);
2002 return (1);
2003 }
2004 }
2005 bgp_fsm(p, EVNT_CON_OPEN, NULL);
2006 return (1);
2007 }
2008 if (pfd->revents & POLLHUP) {
2009 bgp_fsm(p, EVNT_CON_OPENFAIL, NULL);
2010 return (1);
2011 }
2012 if (pfd->revents & (POLLERR|POLLNVAL)) {
2013 bgp_fsm(p, EVNT_CON_FATAL, NULL);
2014 return (1);
2015 }
2016 return (0);
2017 }
2018
2019 if (pfd->revents & POLLHUP) {
2020 bgp_fsm(p, EVNT_CON_CLOSED, NULL);
2021 return (1);
2022 }
2023 if (pfd->revents & (POLLERR|POLLNVAL)) {
2024 bgp_fsm(p, EVNT_CON_FATAL, NULL);
2025 return (1);
2026 }
2027
2028 if (pfd->revents & POLLOUT && msgbuf_queuelen(p->wbuf) > 0) {
2029 if (ibuf_write(p->fd, p->wbuf) == -1) {
2030 if (errno == EPIPE)
2031 log_peer_warnx(&p->conf, "Connection closed");
2032 else
2033 log_peer_warn(&p->conf, "write error");
2034 bgp_fsm(p, EVNT_CON_FATAL, NULL);
2035 return (1);
2036 }
2037 p->stats.last_write = getmonotime();
2038 start_timer_sendholdtime(p);
2039 if (p->throttled &&
2040 msgbuf_queuelen(p->wbuf) < SESS_MSG_LOW_MARK) {
2041 if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
2042 log_peer_warn(&p->conf, "imsg_compose XON");
2043 else
2044 p->throttled = 0;
2045 }
2046 if (!(pfd->revents & POLLIN))
2047 return (1);
2048 }
2049
2050 if (p->fd != -1 && pfd->revents & POLLIN) {
2051 switch (ibuf_read(p->fd, p->wbuf)) {
2052 case -1:
2053 if (p->state == STATE_IDLE)
2054 /* error already handled before */
2055 return (1);
2056 log_peer_warn(&p->conf, "read error");
2057 bgp_fsm(p, EVNT_CON_FATAL, NULL);
2058 return (1);
2059 case 0:
2060 bgp_fsm(p, EVNT_CON_CLOSED, NULL);
2061 return (1);
2062 }
2063 p->stats.last_read = getmonotime();
2064 return (1);
2065 }
2066 return (0);
2067 }
2068
2069 void
session_process_msg(struct peer * p)2070 session_process_msg(struct peer *p)
2071 {
2072 struct ibuf *msg;
2073 struct mrt *mrt;
2074 int processed = 0;
2075 uint8_t msgtype;
2076
2077 p->rpending = 0;
2078 if (p->wbuf == NULL)
2079 return;
2080
2081 /*
2082 * session might drop to IDLE -> all buffers are flushed
2083 */
2084 while ((msg = msgbuf_get(p->wbuf)) != NULL) {
2085 /* skip msg header and extract type */
2086 if (ibuf_skip(msg, MSGSIZE_HEADER_MARKER) == -1 ||
2087 ibuf_skip(msg, sizeof(uint16_t)) == -1 ||
2088 ibuf_get_n8(msg, &msgtype) == -1) {
2089 log_peer_warn(&p->conf, "process message failed");
2090 bgp_fsm(p, EVNT_CON_FATAL, NULL);
2091 ibuf_free(msg);
2092 return;
2093 }
2094 ibuf_rewind(msg);
2095
2096 /* dump to MRT as soon as we have a full packet */
2097 LIST_FOREACH(mrt, &mrthead, entry) {
2098 if (!(mrt->type == MRT_ALL_IN ||
2099 (msgtype == BGP_UPDATE &&
2100 mrt->type == MRT_UPDATE_IN)))
2101 continue;
2102 if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
2103 mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
2104 mrt->group_id == p->conf.groupid))
2105 mrt_dump_bgp_msg(mrt, msg, p, msgtype);
2106 }
2107
2108 ibuf_skip(msg, MSGSIZE_HEADER);
2109
2110 switch (msgtype) {
2111 case BGP_OPEN:
2112 bgp_fsm(p, EVNT_RCVD_OPEN, msg);
2113 p->stats.msg_rcvd_open++;
2114 break;
2115 case BGP_UPDATE:
2116 bgp_fsm(p, EVNT_RCVD_UPDATE, msg);
2117 p->stats.msg_rcvd_update++;
2118 break;
2119 case BGP_NOTIFICATION:
2120 bgp_fsm(p, EVNT_RCVD_NOTIFICATION, msg);
2121 p->stats.msg_rcvd_notification++;
2122 break;
2123 case BGP_KEEPALIVE:
2124 bgp_fsm(p, EVNT_RCVD_KEEPALIVE, msg);
2125 p->stats.msg_rcvd_keepalive++;
2126 break;
2127 case BGP_RREFRESH:
2128 parse_rrefresh(p, msg);
2129 p->stats.msg_rcvd_rrefresh++;
2130 break;
2131 default: /* cannot happen */
2132 session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE,
2133 &msgtype, 1);
2134 log_peer_warnx(&p->conf,
2135 "received message with unknown type %u", msgtype);
2136 bgp_fsm(p, EVNT_CON_FATAL, NULL);
2137 }
2138 ibuf_free(msg);
2139 if (++processed > MSG_PROCESS_LIMIT) {
2140 p->rpending = 1;
2141 break;
2142 }
2143 }
2144 }
2145
2146 struct ibuf *
parse_header(struct ibuf * msg,void * arg,int * fd)2147 parse_header(struct ibuf *msg, void *arg, int *fd)
2148 {
2149 struct peer *peer = arg;
2150 struct ibuf *b;
2151 u_char m[MSGSIZE_HEADER_MARKER];
2152 uint16_t len, maxlen = MAX_PKTSIZE;
2153 uint8_t type;
2154
2155 if (ibuf_get(msg, m, sizeof(m)) == -1 ||
2156 ibuf_get_n16(msg, &len) == -1 ||
2157 ibuf_get_n8(msg, &type) == -1)
2158 return (NULL);
2159 /* caller MUST make sure we are getting 19 bytes! */
2160 if (memcmp(m, marker, sizeof(marker))) {
2161 log_peer_warnx(&peer->conf, "sync error");
2162 session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL);
2163 bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2164 errno = EINVAL;
2165 return (NULL);
2166 }
2167
2168 if (peer->capa.ann.ext_msg)
2169 maxlen = MAX_EXT_PKTSIZE;
2170
2171 if (len < MSGSIZE_HEADER || len > maxlen) {
2172 log_peer_warnx(&peer->conf,
2173 "received message: illegal length: %u byte", len);
2174 goto badlen;
2175 }
2176
2177 switch (type) {
2178 case BGP_OPEN:
2179 if (len < MSGSIZE_OPEN_MIN || len > MAX_PKTSIZE) {
2180 log_peer_warnx(&peer->conf,
2181 "received OPEN: illegal len: %u byte", len);
2182 goto badlen;
2183 }
2184 break;
2185 case BGP_NOTIFICATION:
2186 if (len < MSGSIZE_NOTIFICATION_MIN) {
2187 log_peer_warnx(&peer->conf,
2188 "received NOTIFICATION: illegal len: %u byte", len);
2189 goto badlen;
2190 }
2191 break;
2192 case BGP_UPDATE:
2193 if (len < MSGSIZE_UPDATE_MIN) {
2194 log_peer_warnx(&peer->conf,
2195 "received UPDATE: illegal len: %u byte", len);
2196 goto badlen;
2197 }
2198 break;
2199 case BGP_KEEPALIVE:
2200 if (len != MSGSIZE_KEEPALIVE) {
2201 log_peer_warnx(&peer->conf,
2202 "received KEEPALIVE: illegal len: %u byte", len);
2203 goto badlen;
2204 }
2205 break;
2206 case BGP_RREFRESH:
2207 if (len < MSGSIZE_RREFRESH_MIN) {
2208 log_peer_warnx(&peer->conf,
2209 "received RREFRESH: illegal len: %u byte", len);
2210 goto badlen;
2211 }
2212 break;
2213 default:
2214 log_peer_warnx(&peer->conf,
2215 "received msg with unknown type %u", type);
2216 session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE,
2217 &type, sizeof(type));
2218 bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2219 errno = EINVAL;
2220 return (NULL);
2221 }
2222
2223 if ((b = ibuf_open(len)) == NULL)
2224 return (NULL);
2225 return (b);
2226
2227 badlen:
2228 len = htons(len);
2229 session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2230 &len, sizeof(len));
2231 bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2232 errno = ERANGE;
2233 return (NULL);
2234 }
2235
2236 int
parse_open(struct peer * peer,struct ibuf * msg)2237 parse_open(struct peer *peer, struct ibuf *msg)
2238 {
2239 uint8_t version, rversion;
2240 uint16_t short_as;
2241 uint16_t holdtime, myholdtime;
2242 uint32_t as, bgpid;
2243 uint8_t optparamlen;
2244
2245 if (ibuf_get_n8(msg, &version) == -1 ||
2246 ibuf_get_n16(msg, &short_as) == -1 ||
2247 ibuf_get_n16(msg, &holdtime) == -1 ||
2248 ibuf_get_n32(msg, &bgpid) == -1 ||
2249 ibuf_get_n8(msg, &optparamlen) == -1)
2250 goto bad_len;
2251
2252 if (version != BGP_VERSION) {
2253 log_peer_warnx(&peer->conf,
2254 "peer wants unrecognized version %u", version);
2255 if (version > BGP_VERSION)
2256 rversion = version - BGP_VERSION;
2257 else
2258 rversion = BGP_VERSION;
2259 session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION,
2260 &rversion, sizeof(rversion));
2261 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2262 return (-1);
2263 }
2264
2265 as = peer->short_as = short_as;
2266 if (as == 0) {
2267 log_peer_warnx(&peer->conf,
2268 "peer requests unacceptable AS %u", as);
2269 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2270 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2271 return (-1);
2272 }
2273
2274 if (holdtime && holdtime < peer->conf.min_holdtime) {
2275 log_peer_warnx(&peer->conf,
2276 "peer requests unacceptable holdtime %u", holdtime);
2277 session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL);
2278 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2279 return (-1);
2280 }
2281
2282 myholdtime = peer->conf.holdtime;
2283 if (!myholdtime)
2284 myholdtime = conf->holdtime;
2285 if (holdtime < myholdtime)
2286 peer->holdtime = holdtime;
2287 else
2288 peer->holdtime = myholdtime;
2289
2290 /* check bgpid for validity - just disallow 0 */
2291 if (bgpid == 0) {
2292 log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable");
2293 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2294 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2295 return (-1);
2296 }
2297 peer->remote_bgpid = bgpid;
2298
2299 if (optparamlen != 0) {
2300 struct ibuf oparams, op;
2301 uint8_t ext_type, op_type;
2302 uint16_t ext_len, op_len;
2303
2304 ibuf_from_ibuf(&oparams, msg);
2305
2306 /* check for RFC9072 encoding */
2307 if (ibuf_get_n8(&oparams, &ext_type) == -1)
2308 goto bad_len;
2309 if (ext_type == OPT_PARAM_EXT_LEN) {
2310 if (ibuf_get_n16(&oparams, &ext_len) == -1)
2311 goto bad_len;
2312 /* skip RFC9072 header */
2313 if (ibuf_skip(msg, 3) == -1)
2314 goto bad_len;
2315 } else {
2316 ext_len = optparamlen;
2317 ibuf_rewind(&oparams);
2318 }
2319
2320 if (ibuf_truncate(&oparams, ext_len) == -1 ||
2321 ibuf_skip(msg, ext_len) == -1)
2322 goto bad_len;
2323
2324 while (ibuf_size(&oparams) > 0) {
2325 if (ibuf_get_n8(&oparams, &op_type) == -1)
2326 goto bad_len;
2327
2328 if (ext_type == OPT_PARAM_EXT_LEN) {
2329 if (ibuf_get_n16(&oparams, &op_len) == -1)
2330 goto bad_len;
2331 } else {
2332 uint8_t tmp;
2333 if (ibuf_get_n8(&oparams, &tmp) == -1)
2334 goto bad_len;
2335 op_len = tmp;
2336 }
2337
2338 if (ibuf_get_ibuf(&oparams, op_len, &op) == -1)
2339 goto bad_len;
2340
2341 switch (op_type) {
2342 case OPT_PARAM_CAPABILITIES: /* RFC 3392 */
2343 if (parse_capabilities(peer, &op, &as) == -1) {
2344 session_notification(peer, ERR_OPEN, 0,
2345 NULL);
2346 change_state(peer, STATE_IDLE,
2347 EVNT_RCVD_OPEN);
2348 return (-1);
2349 }
2350 break;
2351 case OPT_PARAM_AUTH: /* deprecated */
2352 default:
2353 /*
2354 * unsupported type
2355 * the RFCs tell us to leave the data section
2356 * empty and notify the peer with ERR_OPEN,
2357 * ERR_OPEN_OPT. How the peer should know
2358 * _which_ optional parameter we don't support
2359 * is beyond me.
2360 */
2361 log_peer_warnx(&peer->conf,
2362 "received OPEN message with unsupported "
2363 "optional parameter: type %u", op_type);
2364 session_notification(peer, ERR_OPEN,
2365 ERR_OPEN_OPT, NULL);
2366 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2367 return (-1);
2368 }
2369 }
2370 }
2371
2372 if (ibuf_size(msg) != 0) {
2373 bad_len:
2374 log_peer_warnx(&peer->conf,
2375 "corrupt OPEN message received: length mismatch");
2376 session_notification(peer, ERR_OPEN, 0, NULL);
2377 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2378 return (-1);
2379 }
2380
2381 /*
2382 * if remote-as is zero and it's a cloned neighbor, accept any
2383 * but only on the first connect, after that the remote-as needs
2384 * to remain the same.
2385 */
2386 if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2387 peer->conf.remote_as = as;
2388 peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2389 if (!peer->conf.ebgp)
2390 /* force enforce_as off for iBGP sessions */
2391 peer->conf.enforce_as = ENFORCE_AS_OFF;
2392 }
2393
2394 if (peer->conf.remote_as != as) {
2395 log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2396 log_as(as));
2397 session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2398 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2399 return (-1);
2400 }
2401
2402 /* on iBGP sessions check for bgpid collision */
2403 if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) {
2404 struct in_addr ina;
2405 ina.s_addr = htonl(bgpid);
2406 log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours",
2407 inet_ntoa(ina));
2408 session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2409 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2410 return (-1);
2411 }
2412
2413 if (capa_neg_calc(peer) == -1) {
2414 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2415 return (-1);
2416 }
2417
2418 return (0);
2419 }
2420
2421 int
parse_update(struct peer * peer,struct ibuf * msg)2422 parse_update(struct peer *peer, struct ibuf *msg)
2423 {
2424 /*
2425 * we pass the message verbatim to the rde.
2426 * in case of errors the whole session is reset with a
2427 * notification anyway, we only need to know the peer
2428 */
2429 if (imsg_rde(IMSG_UPDATE, peer->conf.id, ibuf_data(msg),
2430 ibuf_size(msg)) == -1)
2431 return (-1);
2432
2433 return (0);
2434 }
2435
2436 int
parse_rrefresh(struct peer * peer,struct ibuf * msg)2437 parse_rrefresh(struct peer *peer, struct ibuf *msg)
2438 {
2439 struct route_refresh rr;
2440 uint16_t afi, datalen;
2441 uint8_t aid, safi, subtype;
2442
2443 datalen = ibuf_size(msg) + MSGSIZE_HEADER;
2444
2445 if (ibuf_get_n16(msg, &afi) == -1 ||
2446 ibuf_get_n8(msg, &subtype) == -1 ||
2447 ibuf_get_n8(msg, &safi) == -1) {
2448 /* minimum size checked in session_process_msg() */
2449 fatalx("%s: message too small", __func__);
2450 }
2451
2452 /* check subtype if peer announced enhanced route refresh */
2453 if (peer->capa.neg.enhanced_rr) {
2454 switch (subtype) {
2455 case ROUTE_REFRESH_REQUEST:
2456 /* no ORF support, so no oversized RREFRESH msgs */
2457 if (datalen != MSGSIZE_RREFRESH) {
2458 log_peer_warnx(&peer->conf,
2459 "received RREFRESH: illegal len: %u byte",
2460 datalen);
2461 datalen = htons(datalen);
2462 session_notification_data(peer, ERR_HEADER,
2463 ERR_HDR_LEN, &datalen, sizeof(datalen));
2464 bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2465 return (-1);
2466 }
2467 peer->stats.refresh_rcvd_req++;
2468 break;
2469 case ROUTE_REFRESH_BEGIN_RR:
2470 case ROUTE_REFRESH_END_RR:
2471 /* special handling for RFC7313 */
2472 if (datalen != MSGSIZE_RREFRESH) {
2473 log_peer_warnx(&peer->conf,
2474 "received RREFRESH: illegal len: %u byte",
2475 datalen);
2476 ibuf_rewind(msg);
2477 session_notification(peer, ERR_RREFRESH,
2478 ERR_RR_INV_LEN, msg);
2479 bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2480 return (-1);
2481 }
2482 if (subtype == ROUTE_REFRESH_BEGIN_RR)
2483 peer->stats.refresh_rcvd_borr++;
2484 else
2485 peer->stats.refresh_rcvd_eorr++;
2486 break;
2487 default:
2488 log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2489 "bad subtype %d", subtype);
2490 return (0);
2491 }
2492 } else {
2493 /* force subtype to default */
2494 subtype = ROUTE_REFRESH_REQUEST;
2495 peer->stats.refresh_rcvd_req++;
2496 }
2497
2498 /* afi/safi unchecked - unrecognized values will be ignored anyway */
2499 if (afi2aid(afi, safi, &aid) == -1) {
2500 log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2501 "invalid afi/safi pair");
2502 return (0);
2503 }
2504
2505 if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) {
2506 log_peer_warnx(&peer->conf, "peer sent unexpected refresh");
2507 return (0);
2508 }
2509
2510 rr.aid = aid;
2511 rr.subtype = subtype;
2512
2513 if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1)
2514 return (-1);
2515
2516 return (0);
2517 }
2518
2519 void
parse_notification(struct peer * peer,struct ibuf * msg)2520 parse_notification(struct peer *peer, struct ibuf *msg)
2521 {
2522 const char *reason = "received";
2523 uint8_t errcode, subcode;
2524 uint8_t reason_len;
2525 enum session_events event = EVNT_RCVD_NOTIFICATION;
2526
2527 if (ibuf_get_n8(msg, &errcode) == -1 ||
2528 ibuf_get_n8(msg, &subcode) == -1) {
2529 log_peer_warnx(&peer->conf, "received bad notification");
2530 goto done;
2531 }
2532
2533 /* RFC8538: check for hard-reset or graceful notification */
2534 if (peer->capa.neg.grestart.grnotification) {
2535 if (errcode == ERR_CEASE && subcode == ERR_CEASE_HARD_RESET) {
2536 if (ibuf_get_n8(msg, &errcode) == -1 ||
2537 ibuf_get_n8(msg, &subcode) == -1) {
2538 log_peer_warnx(&peer->conf,
2539 "received bad hard-reset notification");
2540 goto done;
2541 }
2542 reason = "received hard-reset";
2543 } else {
2544 reason = "received graceful";
2545 event = EVNT_RCVD_GRACE_NOTIFICATION;
2546 }
2547 }
2548
2549 peer->errcnt++;
2550 peer->stats.last_rcvd_errcode = errcode;
2551 peer->stats.last_rcvd_suberr = subcode;
2552
2553 log_notification(peer, errcode, subcode, msg, reason);
2554
2555 CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX);
2556 memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason));
2557 if (errcode == ERR_CEASE &&
2558 (subcode == ERR_CEASE_ADMIN_DOWN ||
2559 subcode == ERR_CEASE_ADMIN_RESET)) {
2560 /* check if shutdown reason is included */
2561 if (ibuf_get_n8(msg, &reason_len) != -1 && reason_len != 0) {
2562 if (ibuf_get(msg, peer->stats.last_reason,
2563 reason_len) == -1)
2564 log_peer_warnx(&peer->conf,
2565 "received truncated shutdown reason");
2566 }
2567 }
2568
2569 done:
2570 change_state(peer, STATE_IDLE, event);
2571 }
2572
2573 int
parse_capabilities(struct peer * peer,struct ibuf * buf,uint32_t * as)2574 parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
2575 {
2576 struct ibuf capabuf;
2577 uint16_t afi, nhafi, gr_header;
2578 uint8_t capa_code, capa_len;
2579 uint8_t safi, aid, role, flags;
2580
2581 while (ibuf_size(buf) > 0) {
2582 if (ibuf_get_n8(buf, &capa_code) == -1 ||
2583 ibuf_get_n8(buf, &capa_len) == -1) {
2584 log_peer_warnx(&peer->conf, "Bad capabilities attr "
2585 "length: too short");
2586 return (-1);
2587 }
2588 if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) {
2589 log_peer_warnx(&peer->conf,
2590 "Received bad capabilities attr length: "
2591 "len %zu smaller than capa_len %u",
2592 ibuf_size(buf), capa_len);
2593 return (-1);
2594 }
2595
2596 switch (capa_code) {
2597 case CAPA_MP: /* RFC 4760 */
2598 if (capa_len != 4 ||
2599 ibuf_get_n16(&capabuf, &afi) == -1 ||
2600 ibuf_skip(&capabuf, 1) == -1 ||
2601 ibuf_get_n8(&capabuf, &safi) == -1) {
2602 log_peer_warnx(&peer->conf,
2603 "Received bad multi protocol capability");
2604 break;
2605 }
2606 if (afi2aid(afi, safi, &aid) == -1) {
2607 log_peer_warnx(&peer->conf,
2608 "Received multi protocol capability: "
2609 " unknown AFI %u, safi %u pair",
2610 afi, safi);
2611 peer->capa.peer.mp[AID_UNSPEC] = 1;
2612 break;
2613 }
2614 peer->capa.peer.mp[aid] = 1;
2615 break;
2616 case CAPA_REFRESH:
2617 peer->capa.peer.refresh = 1;
2618 break;
2619 case CAPA_EXT_NEXTHOP:
2620 while (ibuf_size(&capabuf) > 0) {
2621 uint16_t tmp16;
2622 if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2623 ibuf_get_n16(&capabuf, &tmp16) == -1 ||
2624 ibuf_get_n16(&capabuf, &nhafi) == -1) {
2625 log_peer_warnx(&peer->conf,
2626 "Received bad %s capability",
2627 log_capability(CAPA_EXT_NEXTHOP));
2628 memset(peer->capa.peer.ext_nh, 0,
2629 sizeof(peer->capa.peer.ext_nh));
2630 break;
2631 }
2632 safi = tmp16;
2633 if (afi2aid(afi, safi, &aid) == -1 ||
2634 !(aid == AID_INET || aid == AID_VPN_IPv4)) {
2635 log_peer_warnx(&peer->conf,
2636 "Received %s capability: "
2637 " unsupported AFI %u, safi %u pair",
2638 log_capability(CAPA_EXT_NEXTHOP),
2639 afi, safi);
2640 continue;
2641 }
2642 if (nhafi != AFI_IPv6) {
2643 log_peer_warnx(&peer->conf,
2644 "Received %s capability: "
2645 " unsupported nexthop AFI %u",
2646 log_capability(CAPA_EXT_NEXTHOP),
2647 nhafi);
2648 continue;
2649 }
2650 peer->capa.peer.ext_nh[aid] = 1;
2651 }
2652 break;
2653 case CAPA_EXT_MSG:
2654 peer->capa.peer.ext_msg = 1;
2655 break;
2656 case CAPA_ROLE:
2657 if (capa_len != 1 ||
2658 ibuf_get_n8(&capabuf, &role) == -1) {
2659 log_peer_warnx(&peer->conf,
2660 "Received bad role capability");
2661 break;
2662 }
2663 if (!peer->conf.ebgp) {
2664 log_peer_warnx(&peer->conf,
2665 "Received role capability on iBGP session");
2666 break;
2667 }
2668 peer->capa.peer.policy = 1;
2669 peer->remote_role = capa2role(role);
2670 break;
2671 case CAPA_RESTART:
2672 if (capa_len == 2) {
2673 /* peer only supports EoR marker */
2674 peer->capa.peer.grestart.restart = 1;
2675 peer->capa.peer.grestart.timeout = 0;
2676 break;
2677 } else if (capa_len % 4 != 2) {
2678 log_peer_warnx(&peer->conf,
2679 "Bad graceful restart capability");
2680 peer->capa.peer.grestart.restart = 0;
2681 peer->capa.peer.grestart.timeout = 0;
2682 break;
2683 }
2684
2685 if (ibuf_get_n16(&capabuf, &gr_header) == -1) {
2686 bad_gr_restart:
2687 log_peer_warnx(&peer->conf,
2688 "Bad graceful restart capability");
2689 peer->capa.peer.grestart.restart = 0;
2690 peer->capa.peer.grestart.timeout = 0;
2691 break;
2692 }
2693
2694 peer->capa.peer.grestart.timeout =
2695 gr_header & CAPA_GR_TIMEMASK;
2696 if (peer->capa.peer.grestart.timeout == 0) {
2697 log_peer_warnx(&peer->conf, "Received "
2698 "graceful restart with zero timeout");
2699 peer->capa.peer.grestart.restart = 0;
2700 break;
2701 }
2702
2703 while (ibuf_size(&capabuf) > 0) {
2704 if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2705 ibuf_get_n8(&capabuf, &safi) == -1 ||
2706 ibuf_get_n8(&capabuf, &flags) == -1)
2707 goto bad_gr_restart;
2708 if (afi2aid(afi, safi, &aid) == -1) {
2709 log_peer_warnx(&peer->conf,
2710 "Received graceful restart capa: "
2711 " unknown AFI %u, safi %u pair",
2712 afi, safi);
2713 continue;
2714 }
2715 peer->capa.peer.grestart.flags[aid] |=
2716 CAPA_GR_PRESENT;
2717 if (flags & CAPA_GR_F_FLAG)
2718 peer->capa.peer.grestart.flags[aid] |=
2719 CAPA_GR_FORWARD;
2720 if (gr_header & CAPA_GR_R_FLAG)
2721 peer->capa.peer.grestart.flags[aid] |=
2722 CAPA_GR_RESTART;
2723 peer->capa.peer.grestart.restart = 2;
2724 }
2725 if (gr_header & CAPA_GR_N_FLAG)
2726 peer->capa.peer.grestart.grnotification = 1;
2727 break;
2728 case CAPA_AS4BYTE:
2729 if (capa_len != 4 ||
2730 ibuf_get_n32(&capabuf, as) == -1) {
2731 log_peer_warnx(&peer->conf,
2732 "Received bad AS4BYTE capability");
2733 peer->capa.peer.as4byte = 0;
2734 break;
2735 }
2736 if (*as == 0) {
2737 log_peer_warnx(&peer->conf,
2738 "peer requests unacceptable AS %u", *as);
2739 session_notification(peer, ERR_OPEN,
2740 ERR_OPEN_AS, NULL);
2741 change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2742 return (-1);
2743 }
2744 peer->capa.peer.as4byte = 1;
2745 break;
2746 case CAPA_ADD_PATH:
2747 if (capa_len % 4 != 0) {
2748 bad_add_path:
2749 log_peer_warnx(&peer->conf,
2750 "Received bad ADD-PATH capability");
2751 memset(peer->capa.peer.add_path, 0,
2752 sizeof(peer->capa.peer.add_path));
2753 break;
2754 }
2755 while (ibuf_size(&capabuf) > 0) {
2756 if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2757 ibuf_get_n8(&capabuf, &safi) == -1 ||
2758 ibuf_get_n8(&capabuf, &flags) == -1)
2759 goto bad_add_path;
2760 if (afi2aid(afi, safi, &aid) == -1) {
2761 log_peer_warnx(&peer->conf,
2762 "Received ADD-PATH capa: "
2763 " unknown AFI %u, safi %u pair",
2764 afi, safi);
2765 memset(peer->capa.peer.add_path, 0,
2766 sizeof(peer->capa.peer.add_path));
2767 break;
2768 }
2769 if (flags & ~CAPA_AP_BIDIR) {
2770 log_peer_warnx(&peer->conf,
2771 "Received ADD-PATH capa: "
2772 " bad flags %x", flags);
2773 memset(peer->capa.peer.add_path, 0,
2774 sizeof(peer->capa.peer.add_path));
2775 break;
2776 }
2777 peer->capa.peer.add_path[aid] = flags;
2778 }
2779 break;
2780 case CAPA_ENHANCED_RR:
2781 peer->capa.peer.enhanced_rr = 1;
2782 break;
2783 default:
2784 break;
2785 }
2786 }
2787
2788 return (0);
2789 }
2790
2791 int
capa_neg_calc(struct peer * p)2792 capa_neg_calc(struct peer *p)
2793 {
2794 struct ibuf *ebuf;
2795 uint8_t i, hasmp = 0, capa_code, capa_len, capa_aid = 0;
2796
2797 /* a capability is accepted only if both sides announced it */
2798
2799 p->capa.neg.refresh =
2800 (p->capa.ann.refresh && p->capa.peer.refresh) != 0;
2801 p->capa.neg.enhanced_rr =
2802 (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
2803 p->capa.neg.as4byte =
2804 (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
2805 p->capa.neg.ext_msg =
2806 (p->capa.ann.ext_msg && p->capa.peer.ext_msg) != 0;
2807
2808 /* MP: both side must agree on the AFI,SAFI pair */
2809 if (p->capa.peer.mp[AID_UNSPEC])
2810 hasmp = 1;
2811 for (i = AID_MIN; i < AID_MAX; i++) {
2812 if (p->capa.ann.mp[i] && p->capa.peer.mp[i])
2813 p->capa.neg.mp[i] = 1;
2814 else
2815 p->capa.neg.mp[i] = 0;
2816 if (p->capa.ann.mp[i] || p->capa.peer.mp[i])
2817 hasmp = 1;
2818 }
2819 /* if no MP capability present default to IPv4 unicast mode */
2820 if (!hasmp)
2821 p->capa.neg.mp[AID_INET] = 1;
2822
2823 /*
2824 * graceful restart: the peer capabilities are of interest here.
2825 * It is necessary to compare the new values with the previous ones
2826 * and act accordingly. AFI/SAFI that are not part in the MP capability
2827 * are treated as not being present.
2828 * Also make sure that a flush happens if the session stopped
2829 * supporting graceful restart.
2830 */
2831
2832 for (i = AID_MIN; i < AID_MAX; i++) {
2833 int8_t negflags;
2834
2835 /* disable GR if the AFI/SAFI is not present */
2836 if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2837 p->capa.neg.mp[i] == 0))
2838 p->capa.peer.grestart.flags[i] = 0; /* disable */
2839 /* look at current GR state and decide what to do */
2840 negflags = p->capa.neg.grestart.flags[i];
2841 p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2842 if (negflags & CAPA_GR_RESTARTING) {
2843 if (p->capa.ann.grestart.restart != 0 &&
2844 p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) {
2845 p->capa.neg.grestart.flags[i] |=
2846 CAPA_GR_RESTARTING;
2847 } else {
2848 if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2849 &i, sizeof(i)) == -1) {
2850 log_peer_warnx(&p->conf,
2851 "imsg send failed");
2852 return (-1);
2853 }
2854 log_peer_warnx(&p->conf, "graceful restart of "
2855 "%s, not restarted, flushing", aid2str(i));
2856 }
2857 }
2858 }
2859 p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2860 p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2861 if (p->capa.ann.grestart.restart == 0)
2862 p->capa.neg.grestart.restart = 0;
2863
2864 /* RFC 8538 graceful notification: both sides need to agree */
2865 p->capa.neg.grestart.grnotification =
2866 (p->capa.ann.grestart.grnotification &&
2867 p->capa.peer.grestart.grnotification) != 0;
2868
2869 /* RFC 8950 extended nexthop encoding: both sides need to agree */
2870 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2871 for (i = AID_MIN; i < AID_MAX; i++) {
2872 if (p->capa.neg.mp[i] == 0)
2873 continue;
2874 if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) {
2875 p->capa.neg.ext_nh[i] = 1;
2876 }
2877 }
2878
2879 /*
2880 * ADD-PATH: set only those bits where both sides agree.
2881 * For this compare our send bit with the recv bit from the peer
2882 * and vice versa.
2883 * The flags are stored from this systems view point.
2884 * At index 0 the flags are set if any per-AID flag is set.
2885 */
2886 memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2887 for (i = AID_MIN; i < AID_MAX; i++) {
2888 if (p->capa.neg.mp[i] == 0)
2889 continue;
2890 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
2891 (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
2892 p->capa.neg.add_path[i] |= CAPA_AP_RECV;
2893 p->capa.neg.add_path[0] |= CAPA_AP_RECV;
2894 }
2895 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
2896 (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
2897 p->capa.neg.add_path[i] |= CAPA_AP_SEND;
2898 p->capa.neg.add_path[0] |= CAPA_AP_SEND;
2899 }
2900 }
2901
2902 /*
2903 * Open policy: check that the policy is sensible.
2904 *
2905 * Make sure that the roles match and set the negotiated capability
2906 * to the role of the peer. So the RDE can inject the OTC attribute.
2907 * See RFC 9234, section 4.2.
2908 * These checks should only happen on ebgp sessions.
2909 */
2910 if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 &&
2911 p->conf.ebgp) {
2912 switch (p->conf.role) {
2913 case ROLE_PROVIDER:
2914 if (p->remote_role != ROLE_CUSTOMER)
2915 goto policyfail;
2916 break;
2917 case ROLE_RS:
2918 if (p->remote_role != ROLE_RS_CLIENT)
2919 goto policyfail;
2920 break;
2921 case ROLE_RS_CLIENT:
2922 if (p->remote_role != ROLE_RS)
2923 goto policyfail;
2924 break;
2925 case ROLE_CUSTOMER:
2926 if (p->remote_role != ROLE_PROVIDER)
2927 goto policyfail;
2928 break;
2929 case ROLE_PEER:
2930 if (p->remote_role != ROLE_PEER)
2931 goto policyfail;
2932 break;
2933 default:
2934 policyfail:
2935 log_peer_warnx(&p->conf, "open policy role mismatch: "
2936 "our role %s, their role %s",
2937 log_policy(p->conf.role),
2938 log_policy(p->remote_role));
2939 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2940 return (-1);
2941 }
2942 p->capa.neg.policy = 1;
2943 }
2944
2945 /* enforce presence of open policy role capability */
2946 if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 &&
2947 p->conf.ebgp) {
2948 log_peer_warnx(&p->conf, "open policy role enforced but "
2949 "not present");
2950 session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2951 return (-1);
2952 }
2953
2954 /* enforce presence of other capabilities */
2955 if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) {
2956 capa_code = CAPA_REFRESH;
2957 capa_len = 0;
2958 goto fail;
2959 }
2960 /* enforce presence of other capabilities */
2961 if (p->capa.ann.ext_msg == 2 && p->capa.neg.ext_msg == 0) {
2962 capa_code = CAPA_EXT_MSG;
2963 capa_len = 0;
2964 goto fail;
2965 }
2966 if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) {
2967 capa_code = CAPA_ENHANCED_RR;
2968 capa_len = 0;
2969 goto fail;
2970 }
2971 if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) {
2972 capa_code = CAPA_AS4BYTE;
2973 capa_len = 4;
2974 goto fail;
2975 }
2976 if (p->capa.ann.grestart.restart == 2 &&
2977 p->capa.neg.grestart.restart == 0) {
2978 capa_code = CAPA_RESTART;
2979 capa_len = 2;
2980 goto fail;
2981 }
2982 for (i = AID_MIN; i < AID_MAX; i++) {
2983 if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) {
2984 capa_code = CAPA_MP;
2985 capa_len = 4;
2986 capa_aid = i;
2987 goto fail;
2988 }
2989 }
2990
2991 for (i = AID_MIN; i < AID_MAX; i++) {
2992 if (p->capa.neg.mp[i] == 0)
2993 continue;
2994 if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) &&
2995 (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) {
2996 capa_code = CAPA_ADD_PATH;
2997 capa_len = 4;
2998 capa_aid = i;
2999 goto fail;
3000 }
3001 if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) &&
3002 (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) {
3003 capa_code = CAPA_ADD_PATH;
3004 capa_len = 4;
3005 capa_aid = i;
3006 goto fail;
3007 }
3008 }
3009
3010 for (i = AID_MIN; i < AID_MAX; i++) {
3011 if (p->capa.neg.mp[i] == 0)
3012 continue;
3013 if (p->capa.ann.ext_nh[i] == 2 &&
3014 p->capa.neg.ext_nh[i] == 0) {
3015 capa_code = CAPA_EXT_NEXTHOP;
3016 capa_len = 6;
3017 capa_aid = i;
3018 goto fail;
3019 }
3020 }
3021 return (0);
3022
3023 fail:
3024 if ((ebuf = ibuf_dynamic(2, 256)) == NULL)
3025 return (-1);
3026 /* best effort, no problem if it fails */
3027 session_capa_add(ebuf, capa_code, capa_len);
3028 if (capa_code == CAPA_MP)
3029 session_capa_add_mp(ebuf, capa_aid);
3030 else if (capa_code == CAPA_ADD_PATH)
3031 session_capa_add_afi(ebuf, capa_aid, 0);
3032 else if (capa_code == CAPA_EXT_NEXTHOP)
3033 session_capa_add_ext_nh(ebuf, capa_aid);
3034 else if (capa_len > 0)
3035 ibuf_add_zero(ebuf, capa_len);
3036
3037 session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf);
3038 ibuf_free(ebuf);
3039 return (-1);
3040 }
3041
3042 void
session_dispatch_imsg(struct imsgbuf * imsgbuf,int idx,u_int * listener_cnt)3043 session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt)
3044 {
3045 struct imsg imsg;
3046 struct ibuf ibuf;
3047 struct mrt xmrt;
3048 struct route_refresh rr;
3049 struct mrt *mrt;
3050 struct imsgbuf *i;
3051 struct peer *p;
3052 struct listen_addr *la, *next, nla;
3053 struct session_dependon sdon;
3054 struct bgpd_config tconf;
3055 uint32_t peerid;
3056 int n, fd, depend_ok, restricted;
3057 uint16_t t;
3058 uint8_t aid, errcode, subcode;
3059
3060 while (imsgbuf) {
3061 if ((n = imsg_get(imsgbuf, &imsg)) == -1)
3062 fatal("session_dispatch_imsg: imsg_get error");
3063
3064 if (n == 0)
3065 break;
3066
3067 peerid = imsg_get_id(&imsg);
3068 switch (imsg_get_type(&imsg)) {
3069 case IMSG_SOCKET_CONN:
3070 case IMSG_SOCKET_CONN_CTL:
3071 if (idx != PFD_PIPE_MAIN)
3072 fatalx("reconf request not from parent");
3073 if ((fd = imsg_get_fd(&imsg)) == -1) {
3074 log_warnx("expected to receive imsg fd to "
3075 "RDE but didn't receive any");
3076 break;
3077 }
3078 if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
3079 fatal(NULL);
3080 if (imsgbuf_init(i, fd) == -1 ||
3081 imsgbuf_set_maxsize(i, MAX_BGPD_IMSGSIZE) == -1)
3082 fatal(NULL);
3083 if (imsg_get_type(&imsg) == IMSG_SOCKET_CONN) {
3084 if (ibuf_rde) {
3085 log_warnx("Unexpected imsg connection "
3086 "to RDE received");
3087 imsgbuf_clear(ibuf_rde);
3088 free(ibuf_rde);
3089 }
3090 ibuf_rde = i;
3091 } else {
3092 if (ibuf_rde_ctl) {
3093 log_warnx("Unexpected imsg ctl "
3094 "connection to RDE received");
3095 imsgbuf_clear(ibuf_rde_ctl);
3096 free(ibuf_rde_ctl);
3097 }
3098 ibuf_rde_ctl = i;
3099 }
3100 break;
3101 case IMSG_RECONF_CONF:
3102 if (idx != PFD_PIPE_MAIN)
3103 fatalx("reconf request not from parent");
3104 if (imsg_get_data(&imsg, &tconf, sizeof(tconf)) == -1)
3105 fatal("imsg_get_data");
3106
3107 nconf = new_config();
3108 copy_config(nconf, &tconf);
3109 pending_reconf = 1;
3110 break;
3111 case IMSG_RECONF_PEER:
3112 if (idx != PFD_PIPE_MAIN)
3113 fatalx("reconf request not from parent");
3114 if ((p = calloc(1, sizeof(struct peer))) == NULL)
3115 fatal("new_peer");
3116 if (imsg_get_data(&imsg, &p->conf, sizeof(p->conf)) ==
3117 -1)
3118 fatal("imsg_get_data");
3119 p->state = p->prev_state = STATE_NONE;
3120 p->reconf_action = RECONF_REINIT;
3121 if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
3122 fatalx("%s: peer tree is corrupt", __func__);
3123 break;
3124 case IMSG_RECONF_PEER_AUTH:
3125 if (idx != PFD_PIPE_MAIN)
3126 fatalx("reconf request not from parent");
3127 if ((p = getpeerbyid(nconf, peerid)) == NULL) {
3128 log_warnx("no such peer: id=%u", peerid);
3129 break;
3130 }
3131 if (pfkey_recv_conf(p, &imsg) == -1)
3132 fatal("pfkey_recv_conf");
3133 break;
3134 case IMSG_RECONF_LISTENER:
3135 if (idx != PFD_PIPE_MAIN)
3136 fatalx("reconf request not from parent");
3137 if (nconf == NULL)
3138 fatalx("IMSG_RECONF_LISTENER but no config");
3139 if (imsg_get_data(&imsg, &nla, sizeof(nla)) == -1)
3140 fatal("imsg_get_data");
3141 TAILQ_FOREACH(la, conf->listen_addrs, entry)
3142 if (!la_cmp(la, &nla))
3143 break;
3144
3145 if (la == NULL) {
3146 if (nla.reconf != RECONF_REINIT)
3147 fatalx("king bula sez: "
3148 "expected REINIT");
3149
3150 if ((nla.fd = imsg_get_fd(&imsg)) == -1)
3151 log_warnx("expected to receive fd for "
3152 "%s but didn't receive any",
3153 log_sockaddr((struct sockaddr *)
3154 &nla.sa, nla.sa_len));
3155
3156 la = calloc(1, sizeof(struct listen_addr));
3157 if (la == NULL)
3158 fatal(NULL);
3159 memcpy(&la->sa, &nla.sa, sizeof(la->sa));
3160 la->flags = nla.flags;
3161 la->fd = nla.fd;
3162 la->reconf = RECONF_REINIT;
3163 TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
3164 entry);
3165 } else {
3166 if (nla.reconf != RECONF_KEEP)
3167 fatalx("king bula sez: expected KEEP");
3168 la->reconf = RECONF_KEEP;
3169 }
3170
3171 break;
3172 case IMSG_RECONF_CTRL:
3173 if (idx != PFD_PIPE_MAIN)
3174 fatalx("reconf request not from parent");
3175
3176 if (imsg_get_data(&imsg, &restricted,
3177 sizeof(restricted)) == -1)
3178 fatal("imsg_get_data");
3179 if ((fd = imsg_get_fd(&imsg)) == -1) {
3180 log_warnx("expected to receive fd for control "
3181 "socket but didn't receive any");
3182 break;
3183 }
3184 if (restricted) {
3185 control_shutdown(rcsock);
3186 rcsock = fd;
3187 } else {
3188 control_shutdown(csock);
3189 csock = fd;
3190 }
3191 break;
3192 case IMSG_RECONF_DRAIN:
3193 switch (idx) {
3194 case PFD_PIPE_ROUTE:
3195 if (nconf != NULL)
3196 fatalx("got unexpected %s from RDE",
3197 "IMSG_RECONF_DONE");
3198 imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3199 -1, NULL, 0);
3200 break;
3201 case PFD_PIPE_MAIN:
3202 if (nconf == NULL)
3203 fatalx("got unexpected %s from parent",
3204 "IMSG_RECONF_DONE");
3205 imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
3206 -1, NULL, 0);
3207 break;
3208 default:
3209 fatalx("reconf request not from parent or RDE");
3210 }
3211 break;
3212 case IMSG_RECONF_DONE:
3213 if (idx != PFD_PIPE_MAIN)
3214 fatalx("reconf request not from parent");
3215 if (nconf == NULL)
3216 fatalx("got IMSG_RECONF_DONE but no config");
3217 copy_config(conf, nconf);
3218 merge_peers(conf, nconf);
3219
3220 /* delete old listeners */
3221 TAILQ_FOREACH_SAFE(la, conf->listen_addrs, entry,
3222 next) {
3223 if (la->reconf == RECONF_NONE) {
3224 log_info("not listening on %s any more",
3225 log_sockaddr((struct sockaddr *)
3226 &la->sa, la->sa_len));
3227 TAILQ_REMOVE(conf->listen_addrs, la,
3228 entry);
3229 close(la->fd);
3230 free(la);
3231 }
3232 }
3233
3234 /* add new listeners */
3235 TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
3236 entry);
3237
3238 setup_listeners(listener_cnt);
3239 free_config(nconf);
3240 nconf = NULL;
3241 pending_reconf = 0;
3242 log_info("SE reconfigured");
3243 /*
3244 * IMSG_RECONF_DONE is sent when the RDE drained
3245 * the peer config sent in merge_peers().
3246 */
3247 break;
3248 case IMSG_SESSION_DEPENDON:
3249 if (idx != PFD_PIPE_MAIN)
3250 fatalx("IFINFO message not from parent");
3251 if (imsg_get_data(&imsg, &sdon, sizeof(sdon)) == -1)
3252 fatalx("DEPENDON imsg with wrong len");
3253 depend_ok = sdon.depend_state;
3254
3255 RB_FOREACH(p, peer_head, &conf->peers)
3256 if (!strcmp(p->conf.if_depend, sdon.ifname)) {
3257 if (depend_ok && !p->depend_ok) {
3258 p->depend_ok = depend_ok;
3259 bgp_fsm(p, EVNT_START, NULL);
3260 } else if (!depend_ok && p->depend_ok) {
3261 p->depend_ok = depend_ok;
3262 session_stop(p,
3263 ERR_CEASE_OTHER_CHANGE,
3264 NULL);
3265 }
3266 }
3267 break;
3268 case IMSG_MRT_OPEN:
3269 case IMSG_MRT_REOPEN:
3270 if (idx != PFD_PIPE_MAIN)
3271 fatalx("mrt request not from parent");
3272 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3273 log_warnx("mrt open, wrong imsg len");
3274 break;
3275 }
3276
3277 if ((xmrt.fd = imsg_get_fd(&imsg)) == -1) {
3278 log_warnx("expected to receive fd for mrt dump "
3279 "but didn't receive any");
3280 break;
3281 }
3282
3283 mrt = mrt_get(&mrthead, &xmrt);
3284 if (mrt == NULL) {
3285 /* new dump */
3286 mrt = calloc(1, sizeof(struct mrt));
3287 if (mrt == NULL)
3288 fatal("session_dispatch_imsg");
3289 memcpy(mrt, &xmrt, sizeof(struct mrt));
3290 if ((mrt->wbuf = msgbuf_new()) == NULL)
3291 fatal("session_dispatch_imsg");
3292 LIST_INSERT_HEAD(&mrthead, mrt, entry);
3293 } else {
3294 /* old dump reopened */
3295 close(mrt->fd);
3296 }
3297 mrt->fd = xmrt.fd;
3298 break;
3299 case IMSG_MRT_CLOSE:
3300 if (idx != PFD_PIPE_MAIN)
3301 fatalx("mrt request not from parent");
3302 if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3303 log_warnx("mrt close, wrong imsg len");
3304 break;
3305 }
3306
3307 mrt = mrt_get(&mrthead, &xmrt);
3308 if (mrt != NULL)
3309 mrt_done(mrt);
3310 break;
3311 case IMSG_CTL_KROUTE:
3312 case IMSG_CTL_KROUTE_ADDR:
3313 case IMSG_CTL_SHOW_NEXTHOP:
3314 case IMSG_CTL_SHOW_INTERFACE:
3315 case IMSG_CTL_SHOW_FIB_TABLES:
3316 case IMSG_CTL_SHOW_RTR:
3317 case IMSG_CTL_SHOW_TIMER:
3318 if (idx != PFD_PIPE_MAIN)
3319 fatalx("ctl kroute request not from parent");
3320 control_imsg_relay(&imsg, NULL);
3321 break;
3322 case IMSG_CTL_SHOW_NEIGHBOR:
3323 if (idx != PFD_PIPE_ROUTE_CTL)
3324 fatalx("ctl rib request not from RDE");
3325 p = getpeerbyid(conf, peerid);
3326 control_imsg_relay(&imsg, p);
3327 break;
3328 case IMSG_CTL_SHOW_RIB:
3329 case IMSG_CTL_SHOW_RIB_PREFIX:
3330 case IMSG_CTL_SHOW_RIB_COMMUNITIES:
3331 case IMSG_CTL_SHOW_RIB_ATTR:
3332 case IMSG_CTL_SHOW_RIB_MEM:
3333 case IMSG_CTL_SHOW_NETWORK:
3334 case IMSG_CTL_SHOW_FLOWSPEC:
3335 case IMSG_CTL_SHOW_SET:
3336 if (idx != PFD_PIPE_ROUTE_CTL)
3337 fatalx("ctl rib request not from RDE");
3338 control_imsg_relay(&imsg, NULL);
3339 break;
3340 case IMSG_CTL_END:
3341 case IMSG_CTL_RESULT:
3342 control_imsg_relay(&imsg, NULL);
3343 break;
3344 case IMSG_UPDATE:
3345 if (idx != PFD_PIPE_ROUTE)
3346 fatalx("update request not from RDE");
3347 if (imsg_get_ibuf(&imsg, &ibuf) == -1)
3348 log_warn("RDE sent invalid update");
3349 else
3350 session_update(peerid, &ibuf);
3351 break;
3352 case IMSG_UPDATE_ERR:
3353 if (idx != PFD_PIPE_ROUTE)
3354 fatalx("update request not from RDE");
3355 if ((p = getpeerbyid(conf, peerid)) == NULL) {
3356 log_warnx("no such peer: id=%u", peerid);
3357 break;
3358 }
3359 if (imsg_get_ibuf(&imsg, &ibuf) == -1 ||
3360 ibuf_get_n8(&ibuf, &errcode) == -1 ||
3361 ibuf_get_n8(&ibuf, &subcode) == -1) {
3362 log_warnx("RDE sent invalid notification");
3363 break;
3364 }
3365
3366 session_notification(p, errcode, subcode, &ibuf);
3367 switch (errcode) {
3368 case ERR_CEASE:
3369 switch (subcode) {
3370 case ERR_CEASE_MAX_PREFIX:
3371 case ERR_CEASE_MAX_SENT_PREFIX:
3372 t = p->conf.max_out_prefix_restart;
3373 if (subcode == ERR_CEASE_MAX_PREFIX)
3374 t = p->conf.max_prefix_restart;
3375
3376 bgp_fsm(p, EVNT_STOP, NULL);
3377 if (t)
3378 timer_set(&p->timers,
3379 Timer_IdleHold, 60 * t);
3380 break;
3381 default:
3382 bgp_fsm(p, EVNT_CON_FATAL, NULL);
3383 break;
3384 }
3385 break;
3386 default:
3387 bgp_fsm(p, EVNT_CON_FATAL, NULL);
3388 break;
3389 }
3390 break;
3391 case IMSG_REFRESH:
3392 if (idx != PFD_PIPE_ROUTE)
3393 fatalx("route refresh request not from RDE");
3394 if (imsg_get_data(&imsg, &rr, sizeof(rr)) == -1) {
3395 log_warnx("RDE sent invalid refresh msg");
3396 break;
3397 }
3398 if ((p = getpeerbyid(conf, peerid)) == NULL) {
3399 log_warnx("no such peer: id=%u", peerid);
3400 break;
3401 }
3402 if (rr.aid < AID_MIN || rr.aid >= AID_MAX)
3403 fatalx("IMSG_REFRESH: bad AID");
3404 session_rrefresh(p, rr.aid, rr.subtype);
3405 break;
3406 case IMSG_SESSION_RESTARTED:
3407 if (idx != PFD_PIPE_ROUTE)
3408 fatalx("session restart not from RDE");
3409 if (imsg_get_data(&imsg, &aid, sizeof(aid)) == -1) {
3410 log_warnx("RDE sent invalid restart msg");
3411 break;
3412 }
3413 if ((p = getpeerbyid(conf, peerid)) == NULL) {
3414 log_warnx("no such peer: id=%u", peerid);
3415 break;
3416 }
3417 if (aid < AID_MIN || aid >= AID_MAX)
3418 fatalx("IMSG_SESSION_RESTARTED: bad AID");
3419 if (p->capa.neg.grestart.flags[aid] &
3420 CAPA_GR_RESTARTING) {
3421 log_peer_warnx(&p->conf,
3422 "graceful restart of %s finished",
3423 aid2str(aid));
3424 p->capa.neg.grestart.flags[aid] &=
3425 ~CAPA_GR_RESTARTING;
3426 timer_stop(&p->timers, Timer_RestartTimeout);
3427
3428 /* signal back to RDE to cleanup stale routes */
3429 if (imsg_rde(IMSG_SESSION_RESTARTED,
3430 peerid, &aid, sizeof(aid)) == -1)
3431 fatal("imsg_compose: "
3432 "IMSG_SESSION_RESTARTED");
3433 }
3434 break;
3435 default:
3436 break;
3437 }
3438 imsg_free(&imsg);
3439 }
3440 }
3441
3442 int
la_cmp(struct listen_addr * a,struct listen_addr * b)3443 la_cmp(struct listen_addr *a, struct listen_addr *b)
3444 {
3445 struct sockaddr_in *in_a, *in_b;
3446 struct sockaddr_in6 *in6_a, *in6_b;
3447
3448 if (a->sa.ss_family != b->sa.ss_family)
3449 return (1);
3450
3451 switch (a->sa.ss_family) {
3452 case AF_INET:
3453 in_a = (struct sockaddr_in *)&a->sa;
3454 in_b = (struct sockaddr_in *)&b->sa;
3455 if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3456 return (1);
3457 if (in_a->sin_port != in_b->sin_port)
3458 return (1);
3459 break;
3460 case AF_INET6:
3461 in6_a = (struct sockaddr_in6 *)&a->sa;
3462 in6_b = (struct sockaddr_in6 *)&b->sa;
3463 if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3464 sizeof(struct in6_addr)))
3465 return (1);
3466 if (in6_a->sin6_port != in6_b->sin6_port)
3467 return (1);
3468 break;
3469 default:
3470 fatal("king bula sez: unknown address family");
3471 /* NOTREACHED */
3472 }
3473
3474 return (0);
3475 }
3476
3477 struct peer *
getpeerbydesc(struct bgpd_config * c,const char * descr)3478 getpeerbydesc(struct bgpd_config *c, const char *descr)
3479 {
3480 struct peer *p, *res = NULL;
3481 int match = 0;
3482
3483 RB_FOREACH(p, peer_head, &c->peers)
3484 if (!strcmp(p->conf.descr, descr)) {
3485 res = p;
3486 match++;
3487 }
3488
3489 if (match > 1)
3490 log_info("neighbor description \"%s\" not unique, request "
3491 "aborted", descr);
3492
3493 if (match == 1)
3494 return (res);
3495 else
3496 return (NULL);
3497 }
3498
3499 struct peer *
getpeerbyip(struct bgpd_config * c,struct sockaddr * ip)3500 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
3501 {
3502 struct bgpd_addr addr;
3503 struct peer *p, *newpeer, *loose = NULL;
3504 uint32_t id;
3505
3506 sa2addr(ip, &addr, NULL);
3507
3508 /* we might want a more effective way to find peers by IP */
3509 RB_FOREACH(p, peer_head, &c->peers)
3510 if (!p->conf.template &&
3511 !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3512 return (p);
3513
3514 /* try template matching */
3515 RB_FOREACH(p, peer_head, &c->peers)
3516 if (p->conf.template &&
3517 p->conf.remote_addr.aid == addr.aid &&
3518 session_match_mask(p, &addr))
3519 if (loose == NULL || loose->conf.remote_masklen <
3520 p->conf.remote_masklen)
3521 loose = p;
3522
3523 if (loose != NULL) {
3524 /* clone */
3525 if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3526 fatal(NULL);
3527 memcpy(newpeer, loose, sizeof(struct peer));
3528 for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3529 if (getpeerbyid(c, id) == NULL) /* we found a free id */
3530 break;
3531 }
3532 newpeer->template = loose;
3533 session_template_clone(newpeer, ip, id, 0);
3534 newpeer->state = newpeer->prev_state = STATE_NONE;
3535 newpeer->reconf_action = RECONF_KEEP;
3536 newpeer->rpending = 0;
3537 newpeer->wbuf = NULL;
3538 init_peer(newpeer);
3539 /* start delete timer, it is stopped when session goes up. */
3540 timer_set(&newpeer->timers, Timer_SessionDown,
3541 INTERVAL_SESSION_DOWN);
3542 bgp_fsm(newpeer, EVNT_START, NULL);
3543 if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3544 fatalx("%s: peer tree is corrupt", __func__);
3545 return (newpeer);
3546 }
3547
3548 return (NULL);
3549 }
3550
3551 struct peer *
getpeerbyid(struct bgpd_config * c,uint32_t peerid)3552 getpeerbyid(struct bgpd_config *c, uint32_t peerid)
3553 {
3554 static struct peer lookup;
3555
3556 lookup.conf.id = peerid;
3557
3558 return RB_FIND(peer_head, &c->peers, &lookup);
3559 }
3560
3561 int
peer_matched(struct peer * p,struct ctl_neighbor * n)3562 peer_matched(struct peer *p, struct ctl_neighbor *n)
3563 {
3564 char *s;
3565
3566 if (n && n->addr.aid) {
3567 if (memcmp(&p->conf.remote_addr, &n->addr,
3568 sizeof(p->conf.remote_addr)))
3569 return 0;
3570 } else if (n && n->descr[0]) {
3571 s = n->is_group ? p->conf.group : p->conf.descr;
3572 /* cannot trust n->descr to be properly terminated */
3573 if (strncmp(s, n->descr, sizeof(n->descr)))
3574 return 0;
3575 }
3576 return 1;
3577 }
3578
3579 void
session_template_clone(struct peer * p,struct sockaddr * ip,uint32_t id,uint32_t as)3580 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id,
3581 uint32_t as)
3582 {
3583 struct bgpd_addr remote_addr;
3584
3585 if (ip)
3586 sa2addr(ip, &remote_addr, NULL);
3587 else
3588 memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3589
3590 memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3591
3592 p->conf.id = id;
3593
3594 if (as) {
3595 p->conf.remote_as = as;
3596 p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3597 if (!p->conf.ebgp)
3598 /* force enforce_as off for iBGP sessions */
3599 p->conf.enforce_as = ENFORCE_AS_OFF;
3600 }
3601
3602 memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3603 switch (p->conf.remote_addr.aid) {
3604 case AID_INET:
3605 p->conf.remote_masklen = 32;
3606 break;
3607 case AID_INET6:
3608 p->conf.remote_masklen = 128;
3609 break;
3610 }
3611 p->conf.template = 0;
3612 }
3613
3614 int
session_match_mask(struct peer * p,struct bgpd_addr * a)3615 session_match_mask(struct peer *p, struct bgpd_addr *a)
3616 {
3617 struct bgpd_addr masked;
3618
3619 applymask(&masked, a, p->conf.remote_masklen);
3620 if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0)
3621 return (1);
3622 return (0);
3623 }
3624
3625 void
session_down(struct peer * peer)3626 session_down(struct peer *peer)
3627 {
3628 memset(&peer->capa.neg, 0, sizeof(peer->capa.neg));
3629 peer->stats.last_updown = getmonotime();
3630
3631 timer_set(&peer->timers, Timer_SessionDown, INTERVAL_SESSION_DOWN);
3632
3633 /*
3634 * session_down is called in the exit code path so check
3635 * if the RDE is still around, if not there is no need to
3636 * send the message.
3637 */
3638 if (ibuf_rde == NULL)
3639 return;
3640 if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3641 fatalx("imsg_compose error");
3642 }
3643
3644 void
session_up(struct peer * p)3645 session_up(struct peer *p)
3646 {
3647 struct session_up sup;
3648
3649 /* clear last errors, now that the session is up */
3650 p->stats.last_sent_errcode = 0;
3651 p->stats.last_sent_suberr = 0;
3652 p->stats.last_rcvd_errcode = 0;
3653 p->stats.last_rcvd_suberr = 0;
3654 memset(p->stats.last_reason, 0, sizeof(p->stats.last_reason));
3655
3656 timer_stop(&p->timers, Timer_SessionDown);
3657
3658 if (!p->rdesession) {
3659 /* inform rde about new peer */
3660 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3661 &p->conf, sizeof(p->conf)) == -1)
3662 fatalx("imsg_compose error");
3663 p->rdesession = 1;
3664 }
3665
3666 if (p->local.aid == AID_INET) {
3667 sup.local_v4_addr = p->local;
3668 sup.local_v6_addr = p->local_alt;
3669 } else {
3670 sup.local_v6_addr = p->local;
3671 sup.local_v4_addr = p->local_alt;
3672 }
3673 sup.remote_addr = p->remote;
3674 sup.if_scope = p->if_scope;
3675
3676 sup.remote_bgpid = p->remote_bgpid;
3677 sup.short_as = p->short_as;
3678 memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3679 p->stats.last_updown = getmonotime();
3680 if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3681 fatalx("imsg_compose error");
3682 }
3683
3684 int
imsg_ctl_parent(struct imsg * imsg)3685 imsg_ctl_parent(struct imsg *imsg)
3686 {
3687 return imsg_forward(ibuf_main, imsg);
3688 }
3689
3690 int
imsg_ctl_rde(struct imsg * imsg)3691 imsg_ctl_rde(struct imsg *imsg)
3692 {
3693 if (ibuf_rde_ctl == NULL)
3694 return (0);
3695 /*
3696 * Use control socket to talk to RDE to bypass the queue of the
3697 * regular imsg socket.
3698 */
3699 return imsg_forward(ibuf_rde_ctl, imsg);
3700 }
3701
3702 int
imsg_ctl_rde_msg(int type,uint32_t peerid,pid_t pid)3703 imsg_ctl_rde_msg(int type, uint32_t peerid, pid_t pid)
3704 {
3705 if (ibuf_rde_ctl == NULL)
3706 return (0);
3707
3708 /*
3709 * Use control socket to talk to RDE to bypass the queue of the
3710 * regular imsg socket.
3711 */
3712 return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, NULL, 0);
3713 }
3714
3715 int
imsg_rde(int type,uint32_t peerid,void * data,uint16_t datalen)3716 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen)
3717 {
3718 if (ibuf_rde == NULL)
3719 return (0);
3720
3721 return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen);
3722 }
3723
3724 void
session_demote(struct peer * p,int level)3725 session_demote(struct peer *p, int level)
3726 {
3727 struct demote_msg msg;
3728
3729 strlcpy(msg.demote_group, p->conf.demote_group,
3730 sizeof(msg.demote_group));
3731 msg.level = level;
3732 if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3733 &msg, sizeof(msg)) == -1)
3734 fatalx("imsg_compose error");
3735
3736 p->demoted += level;
3737 }
3738
3739 void
session_stop(struct peer * peer,uint8_t subcode,const char * reason)3740 session_stop(struct peer *peer, uint8_t subcode, const char *reason)
3741 {
3742 struct ibuf *ibuf;
3743
3744 if (reason != NULL)
3745 strlcpy(peer->conf.reason, reason, sizeof(peer->conf.reason));
3746
3747 ibuf = ibuf_dynamic(0, REASON_LEN);
3748
3749 if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3750 subcode == ERR_CEASE_ADMIN_RESET) &&
3751 reason != NULL && *reason != '\0' &&
3752 ibuf != NULL) {
3753 if (ibuf_add_n8(ibuf, strlen(reason)) == -1 ||
3754 ibuf_add(ibuf, reason, strlen(reason))) {
3755 log_peer_warnx(&peer->conf,
3756 "trying to send overly long shutdown reason");
3757 ibuf_free(ibuf);
3758 ibuf = NULL;
3759 }
3760 }
3761 switch (peer->state) {
3762 case STATE_OPENSENT:
3763 case STATE_OPENCONFIRM:
3764 case STATE_ESTABLISHED:
3765 session_notification(peer, ERR_CEASE, subcode, ibuf);
3766 break;
3767 default:
3768 /* session not open, no need to send notification */
3769 if (subcode >= sizeof(suberr_cease_names) / sizeof(char *) ||
3770 suberr_cease_names[subcode] == NULL)
3771 log_peer_warnx(&peer->conf, "session stop: %s, "
3772 "unknown subcode %u", errnames[ERR_CEASE], subcode);
3773 else
3774 log_peer_warnx(&peer->conf, "session stop: %s, %s",
3775 errnames[ERR_CEASE], suberr_cease_names[subcode]);
3776 break;
3777 }
3778 ibuf_free(ibuf);
3779 bgp_fsm(peer, EVNT_STOP, NULL);
3780 }
3781
3782 struct bgpd_addr *
session_localaddr(struct peer * p)3783 session_localaddr(struct peer *p)
3784 {
3785 switch (p->conf.remote_addr.aid) {
3786 case AID_INET:
3787 return &p->conf.local_addr_v4;
3788 case AID_INET6:
3789 return &p->conf.local_addr_v6;
3790 }
3791 fatalx("Unknown AID in %s", __func__);
3792 }
3793
3794 void
merge_peers(struct bgpd_config * c,struct bgpd_config * nc)3795 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3796 {
3797 struct peer *p, *np, *next;
3798
3799 RB_FOREACH(p, peer_head, &c->peers) {
3800 /* templates are handled specially */
3801 if (p->template != NULL)
3802 continue;
3803 np = getpeerbyid(nc, p->conf.id);
3804 if (np == NULL) {
3805 p->reconf_action = RECONF_DELETE;
3806 continue;
3807 }
3808
3809 /* peer no longer uses TCP MD5SIG so deconfigure */
3810 if (p->auth_conf.method == AUTH_MD5SIG &&
3811 np->auth_conf.method != AUTH_MD5SIG)
3812 tcp_md5_del_listener(c, p);
3813 else if (np->auth_conf.method == AUTH_MD5SIG)
3814 tcp_md5_add_listener(c, np);
3815
3816 memcpy(&p->conf, &np->conf, sizeof(p->conf));
3817 memcpy(&p->auth_conf, &np->auth_conf, sizeof(p->auth_conf));
3818 RB_REMOVE(peer_head, &nc->peers, np);
3819 free(np);
3820
3821 p->reconf_action = RECONF_KEEP;
3822
3823 /* had demotion, is demoted, demote removed? */
3824 if (p->demoted && !p->conf.demote_group[0])
3825 session_demote(p, -1);
3826
3827 /* if session is not open then refresh pfkey data */
3828 if (p->state < STATE_OPENSENT && !p->template)
3829 imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3830 p->conf.id, 0, -1, NULL, 0);
3831
3832 /*
3833 * If the session is established or the SessionDown timer is
3834 * running sync with the RDE
3835 */
3836 if (p->rdesession) {
3837 if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3838 &p->conf, sizeof(struct peer_config)) == -1)
3839 fatalx("imsg_compose error");
3840 }
3841
3842 /* apply the config to all clones of a template */
3843 if (p->conf.template) {
3844 struct peer *xp;
3845 RB_FOREACH(xp, peer_head, &c->peers) {
3846 if (xp->template != p)
3847 continue;
3848 session_template_clone(xp, NULL, xp->conf.id,
3849 xp->conf.remote_as);
3850
3851 if (p->rdesession) {
3852 if (imsg_rde(IMSG_SESSION_ADD,
3853 xp->conf.id, &xp->conf,
3854 sizeof(xp->conf)) == -1)
3855 fatalx("imsg_compose error");
3856 }
3857 }
3858 }
3859 }
3860
3861 if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1)
3862 fatalx("imsg_compose error");
3863
3864 /* pfkeys of new peers already loaded by the parent process */
3865 RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3866 RB_REMOVE(peer_head, &nc->peers, np);
3867 if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3868 fatalx("%s: peer tree is corrupt", __func__);
3869 if (np->auth_conf.method == AUTH_MD5SIG)
3870 tcp_md5_add_listener(c, np);
3871 }
3872 }
3873