1 /*	$OpenBSD: session.c,v 1.510 2025/02/06 12:38:58 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  * Copyright (c) 2017 Peter van Dijk <peter.van.dijk@powerdns.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/resource.h>
26 #include <sys/un.h>
27 #include <netinet/in.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 #include <limits.h>
32 
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <ifaddrs.h>
37 #include <poll.h>
38 #include <pwd.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <syslog.h>
44 #include <unistd.h>
45 
46 #include "bgpd.h"
47 #include "session.h"
48 #include "log.h"
49 
50 #define PFD_PIPE_MAIN		0
51 #define PFD_PIPE_ROUTE		1
52 #define PFD_PIPE_ROUTE_CTL	2
53 #define PFD_SOCK_CTL		3
54 #define PFD_SOCK_RCTL		4
55 #define PFD_LISTENERS_START	5
56 
57 void	session_sighdlr(int);
58 int	setup_listeners(u_int *);
59 void	init_peer(struct peer *);
60 void	start_timer_holdtime(struct peer *);
61 void	start_timer_sendholdtime(struct peer *);
62 void	start_timer_keepalive(struct peer *);
63 void	session_close_connection(struct peer *);
64 void	change_state(struct peer *, enum session_state, enum session_events);
65 int	session_setup_socket(struct peer *);
66 void	session_accept(int);
67 int	session_connect(struct peer *);
68 void	session_tcp_established(struct peer *);
69 int	session_capa_add(struct ibuf *, uint8_t, uint8_t);
70 struct ibuf	*session_newmsg(enum msg_type, uint16_t);
71 void	session_sendmsg(struct ibuf *, struct peer *, enum msg_type);
72 void	session_open(struct peer *);
73 void	session_keepalive(struct peer *);
74 void	session_update(uint32_t, struct ibuf *);
75 void	session_notification(struct peer *, uint8_t, uint8_t, struct ibuf *);
76 void	session_notification_data(struct peer *, uint8_t, uint8_t, void *,
77 	    size_t);
78 void	session_rrefresh(struct peer *, uint8_t, uint8_t);
79 int	session_graceful_restart(struct peer *);
80 int	session_graceful_stop(struct peer *);
81 int	session_dispatch_msg(struct pollfd *, struct peer *);
82 void	session_process_msg(struct peer *);
83 struct ibuf	*parse_header(struct ibuf *, void *, int *);
84 int	parse_open(struct peer *, struct ibuf *);
85 int	parse_update(struct peer *, struct ibuf *);
86 int	parse_rrefresh(struct peer *, struct ibuf *);
87 void	parse_notification(struct peer *, struct ibuf *);
88 int	parse_capabilities(struct peer *, struct ibuf *, uint32_t *);
89 int	capa_neg_calc(struct peer *);
90 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
91 void	session_up(struct peer *);
92 void	session_down(struct peer *);
93 int	imsg_rde(int, uint32_t, void *, uint16_t);
94 void	session_demote(struct peer *, int);
95 void	merge_peers(struct bgpd_config *, struct bgpd_config *);
96 
97 int		 la_cmp(struct listen_addr *, struct listen_addr *);
98 void		 session_template_clone(struct peer *, struct sockaddr *,
99 		    uint32_t, uint32_t);
100 int		 session_match_mask(struct peer *, struct bgpd_addr *);
101 
102 static struct bgpd_config	*conf, *nconf;
103 static struct imsgbuf		*ibuf_rde;
104 static struct imsgbuf		*ibuf_rde_ctl;
105 static struct imsgbuf		*ibuf_main;
106 
107 struct bgpd_sysdep	 sysdep;
108 volatile sig_atomic_t	 session_quit;
109 int			 pending_reconf;
110 int			 csock = -1, rcsock = -1;
111 u_int			 peer_cnt;
112 
113 struct mrt_head		 mrthead;
114 time_t			 pauseaccept;
115 
116 static const uint8_t	 marker[MSGSIZE_HEADER_MARKER] = {
117 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
118 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
119 };
120 
121 static inline int
peer_compare(const struct peer * a,const struct peer * b)122 peer_compare(const struct peer *a, const struct peer *b)
123 {
124 	return a->conf.id - b->conf.id;
125 }
126 
127 RB_GENERATE(peer_head, peer, entry, peer_compare);
128 
129 void
session_sighdlr(int sig)130 session_sighdlr(int sig)
131 {
132 	switch (sig) {
133 	case SIGINT:
134 	case SIGTERM:
135 		session_quit = 1;
136 		break;
137 	}
138 }
139 
140 int
setup_listeners(u_int * la_cnt)141 setup_listeners(u_int *la_cnt)
142 {
143 	int			 ttl = 255;
144 	struct listen_addr	*la;
145 	u_int			 cnt = 0;
146 
147 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
148 		la->reconf = RECONF_NONE;
149 		cnt++;
150 
151 		if (la->flags & LISTENER_LISTENING)
152 			continue;
153 
154 		if (la->fd == -1) {
155 			log_warn("cannot establish listener on %s: invalid fd",
156 			    log_sockaddr((struct sockaddr *)&la->sa,
157 			    la->sa_len));
158 			continue;
159 		}
160 
161 		if (tcp_md5_prep_listener(la, &conf->peers) == -1)
162 			fatal("tcp_md5_prep_listener");
163 
164 		/* set ttl to 255 so that ttl-security works */
165 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
166 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
167 			log_warn("setup_listeners setsockopt TTL");
168 			continue;
169 		}
170 		if (la->sa.ss_family == AF_INET6 && setsockopt(la->fd,
171 		    IPPROTO_IPV6, IPV6_UNICAST_HOPS, &ttl, sizeof(ttl)) == -1) {
172 			log_warn("setup_listeners setsockopt hoplimit");
173 			continue;
174 		}
175 
176 		if (listen(la->fd, MAX_BACKLOG)) {
177 			close(la->fd);
178 			fatal("listen");
179 		}
180 
181 		la->flags |= LISTENER_LISTENING;
182 
183 		log_info("listening on %s",
184 		    log_sockaddr((struct sockaddr *)&la->sa, la->sa_len));
185 	}
186 
187 	*la_cnt = cnt;
188 
189 	return (0);
190 }
191 
192 void
session_main(int debug,int verbose)193 session_main(int debug, int verbose)
194 {
195 	int			 timeout;
196 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
197 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
198 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
199 	u_int			 new_cnt;
200 	struct passwd		*pw;
201 	struct peer		*p, **peer_l = NULL, *next;
202 	struct mrt		*m, *xm, **mrt_l = NULL;
203 	struct pollfd		*pfd = NULL;
204 	struct listen_addr	*la;
205 	void			*newp;
206 	time_t			 now;
207 	short			 events;
208 
209 	log_init(debug, LOG_DAEMON);
210 	log_setverbose(verbose);
211 
212 	log_procinit(log_procnames[PROC_SE]);
213 
214 	if ((pw = getpwnam(BGPD_USER)) == NULL)
215 		fatal(NULL);
216 
217 	if (chroot(pw->pw_dir) == -1)
218 		fatal("chroot");
219 	if (chdir("/") == -1)
220 		fatal("chdir(\"/\")");
221 
222 	setproctitle("session engine");
223 
224 	if (setgroups(1, &pw->pw_gid) ||
225 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
226 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
227 		fatal("can't drop privileges");
228 
229 	if (pledge("stdio inet recvfd", NULL) == -1)
230 		fatal("pledge");
231 
232 	signal(SIGTERM, session_sighdlr);
233 	signal(SIGINT, session_sighdlr);
234 	signal(SIGPIPE, SIG_IGN);
235 	signal(SIGHUP, SIG_IGN);
236 	signal(SIGALRM, SIG_IGN);
237 	signal(SIGUSR1, SIG_IGN);
238 
239 	if ((ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
240 		fatal(NULL);
241 	if (imsgbuf_init(ibuf_main, 3) == -1 ||
242 	    imsgbuf_set_maxsize(ibuf_main, MAX_BGPD_IMSGSIZE) == -1)
243 		fatal(NULL);
244 	imsgbuf_allow_fdpass(ibuf_main);
245 
246 	LIST_INIT(&mrthead);
247 	listener_cnt = 0;
248 	peer_cnt = 0;
249 	ctl_cnt = 0;
250 
251 	conf = new_config();
252 	log_info("session engine ready");
253 
254 	while (session_quit == 0) {
255 		/* check for peers to be initialized or deleted */
256 		if (!pending_reconf) {
257 			RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
258 				/* new peer that needs init? */
259 				if (p->state == STATE_NONE)
260 					init_peer(p);
261 
262 				/* deletion due? */
263 				if (p->reconf_action == RECONF_DELETE) {
264 					if (p->demoted)
265 						session_demote(p, -1);
266 					p->conf.demote_group[0] = 0;
267 					session_stop(p, ERR_CEASE_PEER_UNCONF,
268 					    NULL);
269 					timer_remove_all(&p->timers);
270 					tcp_md5_del_listener(conf, p);
271 					if (imsg_rde(IMSG_SESSION_DELETE,
272 					    p->conf.id, NULL, 0) == -1)
273 						fatalx("imsg_compose error");
274 					msgbuf_free(p->wbuf);
275 					RB_REMOVE(peer_head, &conf->peers, p);
276 					log_peer_warnx(&p->conf, "removed");
277 					free(p);
278 					peer_cnt--;
279 					continue;
280 				}
281 				p->reconf_action = RECONF_NONE;
282 			}
283 		}
284 
285 		if (peer_cnt > peer_l_elms) {
286 			if ((newp = reallocarray(peer_l, peer_cnt,
287 			    sizeof(struct peer *))) == NULL) {
288 				/* panic for now */
289 				log_warn("could not resize peer_l from %u -> %u"
290 				    " entries", peer_l_elms, peer_cnt);
291 				fatalx("exiting");
292 			}
293 			peer_l = newp;
294 			peer_l_elms = peer_cnt;
295 		}
296 
297 		mrt_cnt = 0;
298 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
299 			xm = LIST_NEXT(m, entry);
300 			if (m->state == MRT_STATE_REMOVE) {
301 				mrt_clean(m);
302 				LIST_REMOVE(m, entry);
303 				free(m);
304 				continue;
305 			}
306 			if (msgbuf_queuelen(m->wbuf) > 0)
307 				mrt_cnt++;
308 		}
309 
310 		if (mrt_cnt > mrt_l_elms) {
311 			if ((newp = reallocarray(mrt_l, mrt_cnt,
312 			    sizeof(struct mrt *))) == NULL) {
313 				/* panic for now */
314 				log_warn("could not resize mrt_l from %u -> %u"
315 				    " entries", mrt_l_elms, mrt_cnt);
316 				fatalx("exiting");
317 			}
318 			mrt_l = newp;
319 			mrt_l_elms = mrt_cnt;
320 		}
321 
322 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
323 		    ctl_cnt + mrt_cnt;
324 		if (new_cnt > pfd_elms) {
325 			if ((newp = reallocarray(pfd, new_cnt,
326 			    sizeof(struct pollfd))) == NULL) {
327 				/* panic for now */
328 				log_warn("could not resize pfd from %u -> %u"
329 				    " entries", pfd_elms, new_cnt);
330 				fatalx("exiting");
331 			}
332 			pfd = newp;
333 			pfd_elms = new_cnt;
334 		}
335 
336 		memset(pfd, 0, sizeof(struct pollfd) * pfd_elms);
337 
338 		set_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main);
339 		set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
340 		set_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl);
341 
342 		if (pauseaccept == 0) {
343 			pfd[PFD_SOCK_CTL].fd = csock;
344 			pfd[PFD_SOCK_CTL].events = POLLIN;
345 			pfd[PFD_SOCK_RCTL].fd = rcsock;
346 			pfd[PFD_SOCK_RCTL].events = POLLIN;
347 		} else {
348 			pfd[PFD_SOCK_CTL].fd = -1;
349 			pfd[PFD_SOCK_RCTL].fd = -1;
350 		}
351 
352 		i = PFD_LISTENERS_START;
353 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
354 			if (pauseaccept == 0) {
355 				pfd[i].fd = la->fd;
356 				pfd[i].events = POLLIN;
357 			} else
358 				pfd[i].fd = -1;
359 			i++;
360 		}
361 		idx_listeners = i;
362 		timeout = 240;	/* loop every 240s at least */
363 
364 		now = getmonotime();
365 		RB_FOREACH(p, peer_head, &conf->peers) {
366 			time_t	nextaction;
367 			struct timer *pt;
368 
369 			/* check timers */
370 			if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
371 				switch (pt->type) {
372 				case Timer_Hold:
373 					bgp_fsm(p, EVNT_TIMER_HOLDTIME, NULL);
374 					break;
375 				case Timer_SendHold:
376 					bgp_fsm(p, EVNT_TIMER_SENDHOLD, NULL);
377 					break;
378 				case Timer_ConnectRetry:
379 					bgp_fsm(p, EVNT_TIMER_CONNRETRY, NULL);
380 					break;
381 				case Timer_Keepalive:
382 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE, NULL);
383 					break;
384 				case Timer_IdleHold:
385 					bgp_fsm(p, EVNT_START, NULL);
386 					break;
387 				case Timer_IdleHoldReset:
388 					p->IdleHoldTime =
389 					    INTERVAL_IDLE_HOLD_INITIAL;
390 					p->errcnt = 0;
391 					timer_stop(&p->timers,
392 					    Timer_IdleHoldReset);
393 					break;
394 				case Timer_CarpUndemote:
395 					timer_stop(&p->timers,
396 					    Timer_CarpUndemote);
397 					if (p->demoted &&
398 					    p->state == STATE_ESTABLISHED)
399 						session_demote(p, -1);
400 					break;
401 				case Timer_RestartTimeout:
402 					timer_stop(&p->timers,
403 					    Timer_RestartTimeout);
404 					session_graceful_stop(p);
405 					break;
406 				case Timer_SessionDown:
407 					timer_stop(&p->timers,
408 					    Timer_SessionDown);
409 
410 					if (imsg_rde(IMSG_SESSION_DELETE,
411 					    p->conf.id, NULL, 0) == -1)
412 						fatalx("imsg_compose error");
413 					p->rdesession = 0;
414 
415 					/* finally delete this cloned peer */
416 					if (p->template)
417 						p->reconf_action =
418 						    RECONF_DELETE;
419 					break;
420 				default:
421 					fatalx("King Bula lost in time");
422 				}
423 			}
424 			if ((nextaction = timer_nextduein(&p->timers,
425 			    now)) != -1 && nextaction < timeout)
426 				timeout = nextaction;
427 
428 			/* are we waiting for a write? */
429 			events = POLLIN;
430 			if (msgbuf_queuelen(p->wbuf) > 0 ||
431 			    p->state == STATE_CONNECT)
432 				events |= POLLOUT;
433 			/* is there still work to do? */
434 			if (p->rpending)
435 				timeout = 0;
436 
437 			/* poll events */
438 			if (p->fd != -1 && events != 0) {
439 				pfd[i].fd = p->fd;
440 				pfd[i].events = events;
441 				peer_l[i - idx_listeners] = p;
442 				i++;
443 			}
444 		}
445 
446 		idx_peers = i;
447 
448 		LIST_FOREACH(m, &mrthead, entry)
449 			if (msgbuf_queuelen(m->wbuf) > 0) {
450 				pfd[i].fd = m->fd;
451 				pfd[i].events = POLLOUT;
452 				mrt_l[i - idx_peers] = m;
453 				i++;
454 			}
455 
456 		idx_mrts = i;
457 
458 		i += control_fill_pfds(pfd + i, pfd_elms -i);
459 
460 		if (i > pfd_elms)
461 			fatalx("poll pfd overflow");
462 
463 		if (pauseaccept && timeout > 1)
464 			timeout = 1;
465 		if (timeout < 0)
466 			timeout = 0;
467 		if (poll(pfd, i, timeout * 1000) == -1) {
468 			if (errno == EINTR)
469 				continue;
470 			fatal("poll error");
471 		}
472 
473 		/*
474 		 * If we previously saw fd exhaustion, we stop accept()
475 		 * for 1 second to throttle the accept() loop.
476 		 */
477 		if (pauseaccept && getmonotime() > pauseaccept + 1)
478 			pauseaccept = 0;
479 
480 		if (handle_pollfd(&pfd[PFD_PIPE_MAIN], ibuf_main) == -1) {
481 			log_warnx("SE: Lost connection to parent");
482 			session_quit = 1;
483 			continue;
484 		} else
485 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
486 			    &listener_cnt);
487 
488 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
489 			log_warnx("SE: Lost connection to RDE");
490 			imsgbuf_clear(ibuf_rde);
491 			free(ibuf_rde);
492 			ibuf_rde = NULL;
493 		} else
494 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
495 			    &listener_cnt);
496 
497 		if (handle_pollfd(&pfd[PFD_PIPE_ROUTE_CTL], ibuf_rde_ctl) ==
498 		    -1) {
499 			log_warnx("SE: Lost connection to RDE control");
500 			imsgbuf_clear(ibuf_rde_ctl);
501 			free(ibuf_rde_ctl);
502 			ibuf_rde_ctl = NULL;
503 		} else
504 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
505 			    &listener_cnt);
506 
507 		if (pfd[PFD_SOCK_CTL].revents & POLLIN)
508 			ctl_cnt += control_accept(csock, 0);
509 
510 		if (pfd[PFD_SOCK_RCTL].revents & POLLIN)
511 			ctl_cnt += control_accept(rcsock, 1);
512 
513 		for (j = PFD_LISTENERS_START; j < idx_listeners; j++)
514 			if (pfd[j].revents & POLLIN)
515 				session_accept(pfd[j].fd);
516 
517 		for (; j < idx_peers; j++)
518 			session_dispatch_msg(&pfd[j],
519 			    peer_l[j - idx_listeners]);
520 
521 		RB_FOREACH(p, peer_head, &conf->peers)
522 			session_process_msg(p);
523 
524 		for (; j < idx_mrts; j++)
525 			if (pfd[j].revents & POLLOUT)
526 				mrt_write(mrt_l[j - idx_peers]);
527 
528 		for (; j < i; j++)
529 			ctl_cnt -= control_dispatch_msg(&pfd[j], &conf->peers);
530 	}
531 
532 	RB_FOREACH_SAFE(p, peer_head, &conf->peers, next) {
533 		session_stop(p, ERR_CEASE_ADMIN_DOWN, "bgpd shutting down");
534 		timer_remove_all(&p->timers);
535 		tcp_md5_del_listener(conf, p);
536 		RB_REMOVE(peer_head, &conf->peers, p);
537 		free(p);
538 	}
539 
540 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
541 		mrt_clean(m);
542 		LIST_REMOVE(m, entry);
543 		free(m);
544 	}
545 
546 	free_config(conf);
547 	free(peer_l);
548 	free(mrt_l);
549 	free(pfd);
550 
551 	/* close pipes */
552 	if (ibuf_rde) {
553 		imsgbuf_write(ibuf_rde);
554 		imsgbuf_clear(ibuf_rde);
555 		close(ibuf_rde->fd);
556 		free(ibuf_rde);
557 	}
558 	if (ibuf_rde_ctl) {
559 		imsgbuf_clear(ibuf_rde_ctl);
560 		close(ibuf_rde_ctl->fd);
561 		free(ibuf_rde_ctl);
562 	}
563 	imsgbuf_write(ibuf_main);
564 	imsgbuf_clear(ibuf_main);
565 	close(ibuf_main->fd);
566 	free(ibuf_main);
567 
568 	control_shutdown(csock);
569 	control_shutdown(rcsock);
570 	log_info("session engine exiting");
571 	exit(0);
572 }
573 
574 void
init_peer(struct peer * p)575 init_peer(struct peer *p)
576 {
577 	TAILQ_INIT(&p->timers);
578 	p->fd = -1;
579 	if (p->wbuf != NULL)
580 		fatalx("%s: msgbuf already set", __func__);
581 	if ((p->wbuf = msgbuf_new_reader(MSGSIZE_HEADER, parse_header, p)) ==
582 	    NULL)
583 		fatal(NULL);
584 
585 	if (p->conf.if_depend[0])
586 		imsg_compose(ibuf_main, IMSG_SESSION_DEPENDON, 0, 0, -1,
587 		    p->conf.if_depend, sizeof(p->conf.if_depend));
588 	else
589 		p->depend_ok = 1;
590 
591 	peer_cnt++;
592 
593 	change_state(p, STATE_IDLE, EVNT_NONE);
594 	if (p->conf.down)
595 		timer_stop(&p->timers, Timer_IdleHold); /* no autostart */
596 	else
597 		timer_set(&p->timers, Timer_IdleHold, SESSION_CLEAR_DELAY);
598 
599 	p->stats.last_updown = getmonotime();
600 
601 	/*
602 	 * on startup, demote if requested.
603 	 * do not handle new peers. they must reach ESTABLISHED beforehand.
604 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
605 	 */
606 	if (p->reconf_action != RECONF_REINIT && p->conf.demote_group[0])
607 		session_demote(p, +1);
608 }
609 
610 void
bgp_fsm(struct peer * peer,enum session_events event,struct ibuf * msg)611 bgp_fsm(struct peer *peer, enum session_events event, struct ibuf *msg)
612 {
613 	switch (peer->state) {
614 	case STATE_NONE:
615 		/* nothing */
616 		break;
617 	case STATE_IDLE:
618 		switch (event) {
619 		case EVNT_START:
620 			timer_stop(&peer->timers, Timer_Hold);
621 			timer_stop(&peer->timers, Timer_SendHold);
622 			timer_stop(&peer->timers, Timer_Keepalive);
623 			timer_stop(&peer->timers, Timer_IdleHold);
624 
625 			if (!peer->depend_ok)
626 				timer_stop(&peer->timers, Timer_ConnectRetry);
627 			else if (peer->passive || peer->conf.passive ||
628 			    peer->conf.template) {
629 				change_state(peer, STATE_ACTIVE, event);
630 				timer_stop(&peer->timers, Timer_ConnectRetry);
631 			} else {
632 				change_state(peer, STATE_CONNECT, event);
633 				timer_set(&peer->timers, Timer_ConnectRetry,
634 				    conf->connectretry);
635 				session_connect(peer);
636 			}
637 			peer->passive = 0;
638 			break;
639 		case EVNT_STOP:
640 			timer_stop(&peer->timers, Timer_IdleHold);
641 			break;
642 		default:
643 			/* ignore */
644 			break;
645 		}
646 		break;
647 	case STATE_CONNECT:
648 		switch (event) {
649 		case EVNT_START:
650 			/* ignore */
651 			break;
652 		case EVNT_CON_OPEN:
653 			session_tcp_established(peer);
654 			session_open(peer);
655 			timer_stop(&peer->timers, Timer_ConnectRetry);
656 			peer->holdtime = INTERVAL_HOLD_INITIAL;
657 			start_timer_holdtime(peer);
658 			change_state(peer, STATE_OPENSENT, event);
659 			break;
660 		case EVNT_CON_OPENFAIL:
661 			timer_set(&peer->timers, Timer_ConnectRetry,
662 			    conf->connectretry);
663 			session_close_connection(peer);
664 			change_state(peer, STATE_ACTIVE, event);
665 			break;
666 		case EVNT_TIMER_CONNRETRY:
667 			timer_set(&peer->timers, Timer_ConnectRetry,
668 			    conf->connectretry);
669 			session_connect(peer);
670 			break;
671 		default:
672 			change_state(peer, STATE_IDLE, event);
673 			break;
674 		}
675 		break;
676 	case STATE_ACTIVE:
677 		switch (event) {
678 		case EVNT_START:
679 			/* ignore */
680 			break;
681 		case EVNT_CON_OPEN:
682 			session_tcp_established(peer);
683 			session_open(peer);
684 			timer_stop(&peer->timers, Timer_ConnectRetry);
685 			peer->holdtime = INTERVAL_HOLD_INITIAL;
686 			start_timer_holdtime(peer);
687 			change_state(peer, STATE_OPENSENT, event);
688 			break;
689 		case EVNT_CON_OPENFAIL:
690 			timer_set(&peer->timers, Timer_ConnectRetry,
691 			    conf->connectretry);
692 			session_close_connection(peer);
693 			change_state(peer, STATE_ACTIVE, event);
694 			break;
695 		case EVNT_TIMER_CONNRETRY:
696 			timer_set(&peer->timers, Timer_ConnectRetry,
697 			    peer->holdtime);
698 			change_state(peer, STATE_CONNECT, event);
699 			session_connect(peer);
700 			break;
701 		default:
702 			change_state(peer, STATE_IDLE, event);
703 			break;
704 		}
705 		break;
706 	case STATE_OPENSENT:
707 		switch (event) {
708 		case EVNT_START:
709 			/* ignore */
710 			break;
711 		case EVNT_STOP:
712 			change_state(peer, STATE_IDLE, event);
713 			break;
714 		case EVNT_CON_CLOSED:
715 			session_close_connection(peer);
716 			timer_set(&peer->timers, Timer_ConnectRetry,
717 			    conf->connectretry);
718 			change_state(peer, STATE_ACTIVE, event);
719 			break;
720 		case EVNT_CON_FATAL:
721 			change_state(peer, STATE_IDLE, event);
722 			break;
723 		case EVNT_TIMER_HOLDTIME:
724 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
725 			    0, NULL);
726 			change_state(peer, STATE_IDLE, event);
727 			break;
728 		case EVNT_TIMER_SENDHOLD:
729 			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
730 			    0, NULL);
731 			change_state(peer, STATE_IDLE, event);
732 			break;
733 		case EVNT_RCVD_OPEN:
734 			/* parse_open calls change_state itself on failure */
735 			if (parse_open(peer, msg))
736 				break;
737 			session_keepalive(peer);
738 			change_state(peer, STATE_OPENCONFIRM, event);
739 			break;
740 		case EVNT_RCVD_NOTIFICATION:
741 			parse_notification(peer, msg);
742 			break;
743 		default:
744 			session_notification(peer,
745 			    ERR_FSM, ERR_FSM_UNEX_OPENSENT, NULL);
746 			change_state(peer, STATE_IDLE, event);
747 			break;
748 		}
749 		break;
750 	case STATE_OPENCONFIRM:
751 		switch (event) {
752 		case EVNT_START:
753 			/* ignore */
754 			break;
755 		case EVNT_STOP:
756 			change_state(peer, STATE_IDLE, event);
757 			break;
758 		case EVNT_CON_CLOSED:
759 		case EVNT_CON_FATAL:
760 			change_state(peer, STATE_IDLE, event);
761 			break;
762 		case EVNT_TIMER_HOLDTIME:
763 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
764 			    0, NULL);
765 			change_state(peer, STATE_IDLE, event);
766 			break;
767 		case EVNT_TIMER_SENDHOLD:
768 			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
769 			    0, NULL);
770 			change_state(peer, STATE_IDLE, event);
771 			break;
772 		case EVNT_TIMER_KEEPALIVE:
773 			session_keepalive(peer);
774 			break;
775 		case EVNT_RCVD_KEEPALIVE:
776 			start_timer_holdtime(peer);
777 			change_state(peer, STATE_ESTABLISHED, event);
778 			break;
779 		case EVNT_RCVD_NOTIFICATION:
780 			parse_notification(peer, msg);
781 			break;
782 		default:
783 			session_notification(peer,
784 			    ERR_FSM, ERR_FSM_UNEX_OPENCONFIRM, NULL);
785 			change_state(peer, STATE_IDLE, event);
786 			break;
787 		}
788 		break;
789 	case STATE_ESTABLISHED:
790 		switch (event) {
791 		case EVNT_START:
792 			/* ignore */
793 			break;
794 		case EVNT_STOP:
795 			change_state(peer, STATE_IDLE, event);
796 			break;
797 		case EVNT_CON_CLOSED:
798 		case EVNT_CON_FATAL:
799 			change_state(peer, STATE_IDLE, event);
800 			break;
801 		case EVNT_TIMER_HOLDTIME:
802 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
803 			    0, NULL);
804 			change_state(peer, STATE_IDLE, event);
805 			break;
806 		case EVNT_TIMER_SENDHOLD:
807 			session_notification(peer, ERR_SENDHOLDTIMEREXPIRED,
808 			    0, NULL);
809 			change_state(peer, STATE_IDLE, event);
810 			break;
811 		case EVNT_TIMER_KEEPALIVE:
812 			session_keepalive(peer);
813 			break;
814 		case EVNT_RCVD_KEEPALIVE:
815 			start_timer_holdtime(peer);
816 			break;
817 		case EVNT_RCVD_UPDATE:
818 			start_timer_holdtime(peer);
819 			if (parse_update(peer, msg))
820 				change_state(peer, STATE_IDLE, event);
821 			else
822 				start_timer_holdtime(peer);
823 			break;
824 		case EVNT_RCVD_NOTIFICATION:
825 			parse_notification(peer, msg);
826 			break;
827 		default:
828 			session_notification(peer,
829 			    ERR_FSM, ERR_FSM_UNEX_ESTABLISHED, NULL);
830 			change_state(peer, STATE_IDLE, event);
831 			break;
832 		}
833 		break;
834 	}
835 }
836 
837 void
start_timer_holdtime(struct peer * peer)838 start_timer_holdtime(struct peer *peer)
839 {
840 	if (peer->holdtime > 0)
841 		timer_set(&peer->timers, Timer_Hold, peer->holdtime);
842 	else
843 		timer_stop(&peer->timers, Timer_Hold);
844 }
845 
846 void
start_timer_sendholdtime(struct peer * peer)847 start_timer_sendholdtime(struct peer *peer)
848 {
849 	uint16_t holdtime = INTERVAL_HOLD;
850 
851 	if (peer->holdtime > INTERVAL_HOLD)
852 		holdtime = peer->holdtime;
853 
854 	if (peer->holdtime > 0)
855 		timer_set(&peer->timers, Timer_SendHold, holdtime);
856 	else
857 		timer_stop(&peer->timers, Timer_SendHold);
858 }
859 
860 void
start_timer_keepalive(struct peer * peer)861 start_timer_keepalive(struct peer *peer)
862 {
863 	if (peer->holdtime > 0)
864 		timer_set(&peer->timers, Timer_Keepalive, peer->holdtime / 3);
865 	else
866 		timer_stop(&peer->timers, Timer_Keepalive);
867 }
868 
869 void
session_close_connection(struct peer * peer)870 session_close_connection(struct peer *peer)
871 {
872 	if (peer->fd != -1) {
873 		close(peer->fd);
874 		pauseaccept = 0;
875 	}
876 	peer->fd = -1;
877 }
878 
879 void
change_state(struct peer * peer,enum session_state state,enum session_events event)880 change_state(struct peer *peer, enum session_state state,
881     enum session_events event)
882 {
883 	struct mrt	*mrt;
884 
885 	switch (state) {
886 	case STATE_IDLE:
887 		/* carp demotion first. new peers handled in init_peer */
888 		if (peer->state == STATE_ESTABLISHED &&
889 		    peer->conf.demote_group[0] && !peer->demoted)
890 			session_demote(peer, +1);
891 
892 		/*
893 		 * try to write out what's buffered (maybe a notification),
894 		 * don't bother if it fails
895 		 */
896 		if (peer->state >= STATE_OPENSENT &&
897 		    msgbuf_queuelen(peer->wbuf) > 0)
898 			ibuf_write(peer->fd, peer->wbuf);
899 
900 		/*
901 		 * we must start the timer for the next EVNT_START
902 		 * if we are coming here due to an error and the
903 		 * session was not established successfully before, the
904 		 * starttimerinterval needs to be exponentially increased
905 		 */
906 		if (peer->IdleHoldTime == 0)
907 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
908 		peer->holdtime = INTERVAL_HOLD_INITIAL;
909 		timer_stop(&peer->timers, Timer_ConnectRetry);
910 		timer_stop(&peer->timers, Timer_Keepalive);
911 		timer_stop(&peer->timers, Timer_Hold);
912 		timer_stop(&peer->timers, Timer_SendHold);
913 		timer_stop(&peer->timers, Timer_IdleHold);
914 		timer_stop(&peer->timers, Timer_IdleHoldReset);
915 		session_close_connection(peer);
916 		msgbuf_clear(peer->wbuf);
917 		peer->rpending = 0;
918 		memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
919 		if (!peer->template)
920 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
921 			    peer->conf.id, 0, -1, NULL, 0);
922 
923 		if (peer->state == STATE_ESTABLISHED) {
924 			if (peer->capa.neg.grestart.restart == 2 &&
925 			    (event == EVNT_CON_CLOSED ||
926 			    event == EVNT_CON_FATAL ||
927 			    (peer->capa.neg.grestart.grnotification &&
928 			    (event == EVNT_RCVD_GRACE_NOTIFICATION ||
929 			    event == EVNT_TIMER_HOLDTIME ||
930 			    event == EVNT_TIMER_SENDHOLD)))) {
931 				/* don't punish graceful restart */
932 				timer_set(&peer->timers, Timer_IdleHold, 0);
933 				session_graceful_restart(peer);
934 			} else if (event != EVNT_STOP) {
935 				timer_set(&peer->timers, Timer_IdleHold,
936 				    peer->IdleHoldTime);
937 				if (event != EVNT_NONE &&
938 				    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
939 					peer->IdleHoldTime *= 2;
940 				session_down(peer);
941 			} else {
942 				session_down(peer);
943 			}
944 		} else if (event != EVNT_STOP) {
945 			timer_set(&peer->timers, Timer_IdleHold,
946 			    peer->IdleHoldTime);
947 			if (event != EVNT_NONE &&
948 			    peer->IdleHoldTime < MAX_IDLE_HOLD / 2)
949 				peer->IdleHoldTime *= 2;
950 		}
951 
952 		if (peer->state == STATE_NONE ||
953 		    peer->state == STATE_ESTABLISHED) {
954 			/* initialize capability negotiation structures */
955 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
956 			    sizeof(peer->capa.ann));
957 		}
958 		break;
959 	case STATE_CONNECT:
960 		if (peer->state == STATE_ESTABLISHED &&
961 		    peer->capa.neg.grestart.restart == 2) {
962 			/* do the graceful restart dance */
963 			session_graceful_restart(peer);
964 			peer->holdtime = INTERVAL_HOLD_INITIAL;
965 			timer_stop(&peer->timers, Timer_ConnectRetry);
966 			timer_stop(&peer->timers, Timer_Keepalive);
967 			timer_stop(&peer->timers, Timer_Hold);
968 			timer_stop(&peer->timers, Timer_SendHold);
969 			timer_stop(&peer->timers, Timer_IdleHold);
970 			timer_stop(&peer->timers, Timer_IdleHoldReset);
971 			session_close_connection(peer);
972 			msgbuf_clear(peer->wbuf);
973 			memset(&peer->capa.peer, 0, sizeof(peer->capa.peer));
974 		}
975 		break;
976 	case STATE_ACTIVE:
977 		if (!peer->template)
978 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
979 			    peer->conf.id, 0, -1, NULL, 0);
980 		break;
981 	case STATE_OPENSENT:
982 		break;
983 	case STATE_OPENCONFIRM:
984 		break;
985 	case STATE_ESTABLISHED:
986 		timer_set(&peer->timers, Timer_IdleHoldReset,
987 		    peer->IdleHoldTime);
988 		if (peer->demoted)
989 			timer_set(&peer->timers, Timer_CarpUndemote,
990 			    INTERVAL_HOLD_DEMOTED);
991 		session_up(peer);
992 		break;
993 	default:		/* something seriously fucked */
994 		break;
995 	}
996 
997 	log_statechange(peer, state, event);
998 	LIST_FOREACH(mrt, &mrthead, entry) {
999 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
1000 			continue;
1001 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1002 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1003 		    mrt->group_id == peer->conf.groupid))
1004 			mrt_dump_state(mrt, peer->state, state, peer);
1005 	}
1006 	peer->prev_state = peer->state;
1007 	peer->state = state;
1008 }
1009 
1010 void
session_accept(int listenfd)1011 session_accept(int listenfd)
1012 {
1013 	int			 connfd;
1014 	socklen_t		 len;
1015 	struct sockaddr_storage	 cliaddr;
1016 	struct peer		*p = NULL;
1017 
1018 	len = sizeof(cliaddr);
1019 	if ((connfd = accept4(listenfd,
1020 	    (struct sockaddr *)&cliaddr, &len,
1021 	    SOCK_CLOEXEC | SOCK_NONBLOCK)) == -1) {
1022 		if (errno == ENFILE || errno == EMFILE)
1023 			pauseaccept = getmonotime();
1024 		else if (errno != EWOULDBLOCK && errno != EINTR &&
1025 		    errno != ECONNABORTED)
1026 			log_warn("accept");
1027 		return;
1028 	}
1029 
1030 	p = getpeerbyip(conf, (struct sockaddr *)&cliaddr);
1031 
1032 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
1033 		if (timer_running(&p->timers, Timer_IdleHold, NULL)) {
1034 			/* fast reconnect after clear */
1035 			p->passive = 1;
1036 			bgp_fsm(p, EVNT_START, NULL);
1037 		}
1038 	}
1039 
1040 	if (p != NULL &&
1041 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
1042 		if (p->fd != -1) {
1043 			if (p->state == STATE_CONNECT)
1044 				session_close_connection(p);
1045 			else {
1046 				close(connfd);
1047 				return;
1048 			}
1049 		}
1050 
1051 open:
1052 		if (p->auth_conf.method != AUTH_NONE && sysdep.no_pfkey) {
1053 			log_peer_warnx(&p->conf,
1054 			    "ipsec or md5sig configured but not available");
1055 			close(connfd);
1056 			return;
1057 		}
1058 
1059 		if (tcp_md5_check(connfd, &p->auth_conf) == -1) {
1060 			log_peer_warn(&p->conf, "check md5sig");
1061 			close(connfd);
1062 			return;
1063 		}
1064 		p->fd = connfd;
1065 		if (session_setup_socket(p)) {
1066 			close(connfd);
1067 			return;
1068 		}
1069 		bgp_fsm(p, EVNT_CON_OPEN, NULL);
1070 		return;
1071 	} else if (p != NULL && p->state == STATE_ESTABLISHED &&
1072 	    p->capa.neg.grestart.restart == 2) {
1073 		/* first do the graceful restart dance */
1074 		change_state(p, STATE_CONNECT, EVNT_CON_CLOSED);
1075 		/* then do part of the open dance */
1076 		goto open;
1077 	} else {
1078 		log_conn_attempt(p, (struct sockaddr *)&cliaddr, len);
1079 		close(connfd);
1080 	}
1081 }
1082 
1083 int
session_connect(struct peer * peer)1084 session_connect(struct peer *peer)
1085 {
1086 	struct sockaddr		*sa;
1087 	struct bgpd_addr	*bind_addr;
1088 	socklen_t		 sa_len;
1089 
1090 	/*
1091 	 * we do not need the overcomplicated collision detection RFC 1771
1092 	 * describes; we simply make sure there is only ever one concurrent
1093 	 * tcp connection per peer.
1094 	 */
1095 	if (peer->fd != -1)
1096 		return (-1);
1097 
1098 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid),
1099 	    SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, IPPROTO_TCP)) == -1) {
1100 		log_peer_warn(&peer->conf, "session_connect socket");
1101 		bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1102 		return (-1);
1103 	}
1104 
1105 	if (peer->auth_conf.method != AUTH_NONE && sysdep.no_pfkey) {
1106 		log_peer_warnx(&peer->conf,
1107 		    "ipsec or md5sig configured but not available");
1108 		bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1109 		return (-1);
1110 	}
1111 
1112 	if (tcp_md5_set(peer->fd, &peer->auth_conf,
1113 	    &peer->conf.remote_addr) == -1)
1114 		log_peer_warn(&peer->conf, "setting md5sig");
1115 
1116 	/* if local-address is set we need to bind() */
1117 	bind_addr = session_localaddr(peer);
1118 	if ((sa = addr2sa(bind_addr, 0, &sa_len)) != NULL) {
1119 		if (bind(peer->fd, sa, sa_len) == -1) {
1120 			log_peer_warn(&peer->conf, "session_connect bind");
1121 			bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1122 			return (-1);
1123 		}
1124 	}
1125 
1126 	if (session_setup_socket(peer)) {
1127 		bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1128 		return (-1);
1129 	}
1130 
1131 	sa = addr2sa(&peer->conf.remote_addr, peer->conf.remote_port, &sa_len);
1132 	if (connect(peer->fd, sa, sa_len) == -1) {
1133 		if (errno != EINPROGRESS) {
1134 			if (errno != peer->lasterr)
1135 				log_peer_warn(&peer->conf, "connect");
1136 			peer->lasterr = errno;
1137 			bgp_fsm(peer, EVNT_CON_OPENFAIL, NULL);
1138 			return (-1);
1139 		}
1140 	} else
1141 		bgp_fsm(peer, EVNT_CON_OPEN, NULL);
1142 
1143 	return (0);
1144 }
1145 
1146 int
session_setup_socket(struct peer * p)1147 session_setup_socket(struct peer *p)
1148 {
1149 	int	ttl = p->conf.distance;
1150 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1151 	int	nodelay = 1;
1152 	int	bsize;
1153 
1154 	switch (p->conf.remote_addr.aid) {
1155 	case AID_INET:
1156 		/* set precedence, see RFC 1771 appendix 5 */
1157 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1158 		    -1) {
1159 			log_peer_warn(&p->conf,
1160 			    "session_setup_socket setsockopt TOS");
1161 			return (-1);
1162 		}
1163 
1164 		if (p->conf.ebgp) {
1165 			/*
1166 			 * set TTL to foreign router's distance
1167 			 * 1=direct n=multihop with ttlsec, we always use 255
1168 			 */
1169 			if (p->conf.ttlsec) {
1170 				ttl = 256 - p->conf.distance;
1171 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1172 				    &ttl, sizeof(ttl)) == -1) {
1173 					log_peer_warn(&p->conf,
1174 					    "session_setup_socket: "
1175 					    "setsockopt MINTTL");
1176 					return (-1);
1177 				}
1178 				ttl = 255;
1179 			}
1180 
1181 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1182 			    sizeof(ttl)) == -1) {
1183 				log_peer_warn(&p->conf,
1184 				    "session_setup_socket setsockopt TTL");
1185 				return (-1);
1186 			}
1187 		}
1188 		break;
1189 	case AID_INET6:
1190 		if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_TCLASS, &pre,
1191 		    sizeof(pre)) == -1) {
1192 			log_peer_warn(&p->conf, "session_setup_socket "
1193 			    "setsockopt TCLASS");
1194 			return (-1);
1195 		}
1196 
1197 		if (p->conf.ebgp) {
1198 			/*
1199 			 * set hoplimit to foreign router's distance
1200 			 * 1=direct n=multihop with ttlsec, we always use 255
1201 			 */
1202 			if (p->conf.ttlsec) {
1203 				ttl = 256 - p->conf.distance;
1204 				if (setsockopt(p->fd, IPPROTO_IPV6,
1205 				    IPV6_MINHOPCOUNT, &ttl, sizeof(ttl))
1206 				    == -1) {
1207 					log_peer_warn(&p->conf,
1208 					    "session_setup_socket: "
1209 					    "setsockopt MINHOPCOUNT");
1210 					return (-1);
1211 				}
1212 				ttl = 255;
1213 			}
1214 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1215 			    &ttl, sizeof(ttl)) == -1) {
1216 				log_peer_warn(&p->conf,
1217 				    "session_setup_socket setsockopt hoplimit");
1218 				return (-1);
1219 			}
1220 		}
1221 		break;
1222 	}
1223 
1224 	/* set TCP_NODELAY */
1225 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1226 	    sizeof(nodelay)) == -1) {
1227 		log_peer_warn(&p->conf,
1228 		    "session_setup_socket setsockopt TCP_NODELAY");
1229 		return (-1);
1230 	}
1231 
1232 	/* limit bufsize. no biggie if it fails */
1233 	bsize = 65535;
1234 	setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize, sizeof(bsize));
1235 	setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize, sizeof(bsize));
1236 
1237 	return (0);
1238 }
1239 
1240 /*
1241  * compare the bgpd_addr with the sockaddr by converting the latter into
1242  * a bgpd_addr. Return true if the two are equal, including any scope
1243  */
1244 static int
sa_equal(struct bgpd_addr * ba,struct sockaddr * b)1245 sa_equal(struct bgpd_addr *ba, struct sockaddr *b)
1246 {
1247 	struct bgpd_addr bb;
1248 
1249 	sa2addr(b, &bb, NULL);
1250 	return (memcmp(ba, &bb, sizeof(*ba)) == 0);
1251 }
1252 
1253 static void
get_alternate_addr(struct bgpd_addr * local,struct bgpd_addr * remote,struct bgpd_addr * alt,unsigned int * scope)1254 get_alternate_addr(struct bgpd_addr *local, struct bgpd_addr *remote,
1255     struct bgpd_addr *alt, unsigned int *scope)
1256 {
1257 	struct ifaddrs	*ifap, *ifa, *match;
1258 	int connected = 0;
1259 	u_int8_t plen;
1260 
1261 	if (getifaddrs(&ifap) == -1)
1262 		fatal("getifaddrs");
1263 
1264 	for (match = ifap; match != NULL; match = match->ifa_next) {
1265 		if (match->ifa_addr == NULL)
1266 			continue;
1267 		if (match->ifa_addr->sa_family != AF_INET &&
1268 		    match->ifa_addr->sa_family != AF_INET6)
1269 			continue;
1270 		if (sa_equal(local, match->ifa_addr)) {
1271 			if (remote->aid == AID_INET6 &&
1272 			    IN6_IS_ADDR_LINKLOCAL(&remote->v6)) {
1273 				/* IPv6 LLA are by definition connected */
1274 				connected = 1;
1275 			} else if (match->ifa_flags & IFF_POINTOPOINT &&
1276 			    match->ifa_dstaddr != NULL) {
1277 				if (sa_equal(remote, match->ifa_dstaddr))
1278 					connected = 1;
1279 			} else if (match->ifa_netmask != NULL) {
1280 				plen = mask2prefixlen(
1281 				    match->ifa_addr->sa_family,
1282 				    match->ifa_netmask);
1283 				if (prefix_compare(local, remote, plen) == 0)
1284 					connected = 1;
1285 			}
1286 			break;
1287 		}
1288 	}
1289 
1290 	if (match == NULL) {
1291 		log_warnx("%s: local address not found", __func__);
1292 		return;
1293 	}
1294 	if (connected)
1295 		*scope = if_nametoindex(match->ifa_name);
1296 	else
1297 		*scope = 0;
1298 
1299 	switch (local->aid) {
1300 	case AID_INET6:
1301 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1302 			if (ifa->ifa_addr != NULL &&
1303 			    ifa->ifa_addr->sa_family == AF_INET &&
1304 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1305 				sa2addr(ifa->ifa_addr, alt, NULL);
1306 				break;
1307 			}
1308 		}
1309 		break;
1310 	case AID_INET:
1311 		for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
1312 			if (ifa->ifa_addr != NULL &&
1313 			    ifa->ifa_addr->sa_family == AF_INET6 &&
1314 			    strcmp(ifa->ifa_name, match->ifa_name) == 0) {
1315 				struct sockaddr_in6 *s =
1316 				    (struct sockaddr_in6 *)ifa->ifa_addr;
1317 
1318 				/* only accept global scope addresses */
1319 				if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
1320 				    IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))
1321 					continue;
1322 				sa2addr(ifa->ifa_addr, alt, NULL);
1323 				break;
1324 			}
1325 		}
1326 		break;
1327 	default:
1328 		log_warnx("%s: unsupported address family %s", __func__,
1329 		    aid2str(local->aid));
1330 		break;
1331 	}
1332 
1333 	freeifaddrs(ifap);
1334 }
1335 
1336 void
session_tcp_established(struct peer * peer)1337 session_tcp_established(struct peer *peer)
1338 {
1339 	struct sockaddr_storage	ss;
1340 	socklen_t		len;
1341 
1342 	len = sizeof(ss);
1343 	if (getsockname(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1344 		log_warn("getsockname");
1345 	sa2addr((struct sockaddr *)&ss, &peer->local, &peer->local_port);
1346 	len = sizeof(ss);
1347 	if (getpeername(peer->fd, (struct sockaddr *)&ss, &len) == -1)
1348 		log_warn("getpeername");
1349 	sa2addr((struct sockaddr *)&ss, &peer->remote, &peer->remote_port);
1350 
1351 	get_alternate_addr(&peer->local, &peer->remote, &peer->local_alt,
1352 	    &peer->if_scope);
1353 }
1354 
1355 int
session_capa_add(struct ibuf * opb,uint8_t capa_code,uint8_t capa_len)1356 session_capa_add(struct ibuf *opb, uint8_t capa_code, uint8_t capa_len)
1357 {
1358 	int errs = 0;
1359 
1360 	errs += ibuf_add_n8(opb, capa_code);
1361 	errs += ibuf_add_n8(opb, capa_len);
1362 	return (errs);
1363 }
1364 
1365 static int
session_capa_add_mp(struct ibuf * buf,uint8_t aid)1366 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
1367 {
1368 	uint16_t		 afi;
1369 	uint8_t			 safi;
1370 	int			 errs = 0;
1371 
1372 	if (aid2afi(aid, &afi, &safi) == -1) {
1373 		log_warn("%s: bad AID", __func__);
1374 		return (-1);
1375 	}
1376 
1377 	errs += ibuf_add_n16(buf, afi);
1378 	errs += ibuf_add_zero(buf, 1);
1379 	errs += ibuf_add_n8(buf, safi);
1380 
1381 	return (errs);
1382 }
1383 
1384 static int
session_capa_add_afi(struct ibuf * b,uint8_t aid,uint8_t flags)1385 session_capa_add_afi(struct ibuf *b, uint8_t aid, uint8_t flags)
1386 {
1387 	int		errs = 0;
1388 	uint16_t	afi;
1389 	uint8_t		safi;
1390 
1391 	if (aid2afi(aid, &afi, &safi)) {
1392 		log_warn("%s: bad AID", __func__);
1393 		return (-1);
1394 	}
1395 
1396 	errs += ibuf_add_n16(b, afi);
1397 	errs += ibuf_add_n8(b, safi);
1398 	errs += ibuf_add_n8(b, flags);
1399 
1400 	return (errs);
1401 }
1402 
1403 static int
session_capa_add_ext_nh(struct ibuf * b,uint8_t aid)1404 session_capa_add_ext_nh(struct ibuf *b, uint8_t aid)
1405 {
1406 	int		errs = 0;
1407 	uint16_t	afi;
1408 	uint8_t		safi;
1409 
1410 	if (aid2afi(aid, &afi, &safi)) {
1411 		log_warn("%s: bad AID", __func__);
1412 		return (-1);
1413 	}
1414 
1415 	errs += ibuf_add_n16(b, afi);
1416 	errs += ibuf_add_n16(b, safi);
1417 	errs += ibuf_add_n16(b, AFI_IPv6);
1418 
1419 	return (errs);
1420 }
1421 
1422 struct ibuf *
session_newmsg(enum msg_type msgtype,uint16_t len)1423 session_newmsg(enum msg_type msgtype, uint16_t len)
1424 {
1425 	struct ibuf		*buf;
1426 	int			 errs = 0;
1427 
1428 	if ((buf = ibuf_open(len)) == NULL)
1429 		return (NULL);
1430 
1431 	errs += ibuf_add(buf, marker, sizeof(marker));
1432 	errs += ibuf_add_n16(buf, len);
1433 	errs += ibuf_add_n8(buf, msgtype);
1434 
1435 	if (errs) {
1436 		ibuf_free(buf);
1437 		return (NULL);
1438 	}
1439 
1440 	return (buf);
1441 }
1442 
1443 void
session_sendmsg(struct ibuf * msg,struct peer * p,enum msg_type msgtype)1444 session_sendmsg(struct ibuf *msg, struct peer *p, enum msg_type msgtype)
1445 {
1446 	struct mrt		*mrt;
1447 
1448 	LIST_FOREACH(mrt, &mrthead, entry) {
1449 		if (!(mrt->type == MRT_ALL_OUT || (msgtype == BGP_UPDATE &&
1450 		    mrt->type == MRT_UPDATE_OUT)))
1451 			continue;
1452 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1453 		    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
1454 		    mrt->group_id == p->conf.groupid))
1455 			mrt_dump_bgp_msg(mrt, msg, p, msgtype);
1456 	}
1457 
1458 	ibuf_close(p->wbuf, msg);
1459 	if (!p->throttled && msgbuf_queuelen(p->wbuf) > SESS_MSG_HIGH_MARK) {
1460 		if (imsg_rde(IMSG_XOFF, p->conf.id, NULL, 0) == -1)
1461 			log_peer_warn(&p->conf, "imsg_compose XOFF");
1462 		else
1463 			p->throttled = 1;
1464 	}
1465 }
1466 
1467 /*
1468  * Translate between internal roles and the value expected by RFC 9234.
1469  */
1470 static uint8_t
role2capa(enum role role)1471 role2capa(enum role role)
1472 {
1473 	switch (role) {
1474 	case ROLE_CUSTOMER:
1475 		return CAPA_ROLE_CUSTOMER;
1476 	case ROLE_PROVIDER:
1477 		return CAPA_ROLE_PROVIDER;
1478 	case ROLE_RS:
1479 		return CAPA_ROLE_RS;
1480 	case ROLE_RS_CLIENT:
1481 		return CAPA_ROLE_RS_CLIENT;
1482 	case ROLE_PEER:
1483 		return CAPA_ROLE_PEER;
1484 	default:
1485 		fatalx("Unsupported role for role capability");
1486 	}
1487 }
1488 
1489 static enum role
capa2role(uint8_t val)1490 capa2role(uint8_t val)
1491 {
1492 	switch (val) {
1493 	case CAPA_ROLE_PROVIDER:
1494 		return ROLE_PROVIDER;
1495 	case CAPA_ROLE_RS:
1496 		return ROLE_RS;
1497 	case CAPA_ROLE_RS_CLIENT:
1498 		return ROLE_RS_CLIENT;
1499 	case CAPA_ROLE_CUSTOMER:
1500 		return ROLE_CUSTOMER;
1501 	case CAPA_ROLE_PEER:
1502 		return ROLE_PEER;
1503 	default:
1504 		return ROLE_NONE;
1505 	}
1506 }
1507 
1508 void
session_open(struct peer * p)1509 session_open(struct peer *p)
1510 {
1511 	struct ibuf		*buf, *opb;
1512 	size_t			 len, optparamlen;
1513 	uint16_t		 holdtime;
1514 	uint8_t			 i;
1515 	int			 errs = 0, extlen = 0;
1516 	int			 mpcapa = 0;
1517 
1518 
1519 	if ((opb = ibuf_dynamic(0, MAX_PKTSIZE - MSGSIZE_OPEN_MIN - 6)) ==
1520 	    NULL) {
1521 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1522 		return;
1523 	}
1524 
1525 	/* multiprotocol extensions, RFC 4760 */
1526 	for (i = AID_MIN; i < AID_MAX; i++)
1527 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1528 			errs += session_capa_add(opb, CAPA_MP, 4);
1529 			errs += session_capa_add_mp(opb, i);
1530 			mpcapa++;
1531 		}
1532 
1533 	/* route refresh, RFC 2918 */
1534 	if (p->capa.ann.refresh)	/* no data */
1535 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1536 
1537 	/* extended nexthop encoding, RFC 8950 */
1538 	if (p->capa.ann.ext_nh[AID_INET]) {
1539 		uint8_t enhlen = 0;
1540 
1541 		if (p->capa.ann.mp[AID_INET])
1542 			enhlen += 6;
1543 		if (p->capa.ann.mp[AID_VPN_IPv4])
1544 			enhlen += 6;
1545 		errs += session_capa_add(opb, CAPA_EXT_NEXTHOP, enhlen);
1546 		if (p->capa.ann.mp[AID_INET])
1547 			errs += session_capa_add_ext_nh(opb, AID_INET);
1548 		if (p->capa.ann.mp[AID_VPN_IPv4])
1549 			errs += session_capa_add_ext_nh(opb, AID_VPN_IPv4);
1550 	}
1551 
1552 	/* extended message support, RFC 8654 */
1553 	if (p->capa.ann.ext_msg)	/* no data */
1554 		errs += session_capa_add(opb, CAPA_EXT_MSG, 0);
1555 
1556 	/* BGP open policy, RFC 9234, only for ebgp sessions */
1557 	if (p->conf.ebgp && p->capa.ann.policy &&
1558 	    p->conf.role != ROLE_NONE &&
1559 	    (p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
1560 	    mpcapa == 0)) {
1561 		errs += session_capa_add(opb, CAPA_ROLE, 1);
1562 		errs += ibuf_add_n8(opb, role2capa(p->conf.role));
1563 	}
1564 
1565 	/* graceful restart and End-of-RIB marker, RFC 4724 */
1566 	if (p->capa.ann.grestart.restart) {
1567 		int		rst = 0;
1568 		uint16_t	hdr = 0;
1569 
1570 		for (i = AID_MIN; i < AID_MAX; i++) {
1571 			if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING)
1572 				rst++;
1573 		}
1574 
1575 		/* Only set the R-flag if no graceful restart is ongoing */
1576 		if (!rst)
1577 			hdr |= CAPA_GR_R_FLAG;
1578 		if (p->capa.ann.grestart.grnotification)
1579 			hdr |= CAPA_GR_N_FLAG;
1580 		errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
1581 		errs += ibuf_add_n16(opb, hdr);
1582 	}
1583 
1584 	/* 4-bytes AS numbers, RFC6793 */
1585 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1586 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t));
1587 		errs += ibuf_add_n32(opb, p->conf.local_as);
1588 	}
1589 
1590 	/* advertisement of multiple paths, RFC7911 */
1591 	if (p->capa.ann.add_path[AID_MIN]) {	/* variable */
1592 		uint8_t	aplen;
1593 
1594 		if (mpcapa)
1595 			aplen = 4 * mpcapa;
1596 		else	/* AID_INET */
1597 			aplen = 4;
1598 		errs += session_capa_add(opb, CAPA_ADD_PATH, aplen);
1599 		if (mpcapa) {
1600 			for (i = AID_MIN; i < AID_MAX; i++) {
1601 				if (p->capa.ann.mp[i]) {
1602 					errs += session_capa_add_afi(opb,
1603 					    i, p->capa.ann.add_path[i] &
1604 					    CAPA_AP_MASK);
1605 				}
1606 			}
1607 		} else {	/* AID_INET */
1608 			errs += session_capa_add_afi(opb, AID_INET,
1609 			    p->capa.ann.add_path[AID_INET] & CAPA_AP_MASK);
1610 		}
1611 	}
1612 
1613 	/* enhanced route-refresh, RFC7313 */
1614 	if (p->capa.ann.enhanced_rr)	/* no data */
1615 		errs += session_capa_add(opb, CAPA_ENHANCED_RR, 0);
1616 
1617 	if (errs) {
1618 		ibuf_free(opb);
1619 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1620 		return;
1621 	}
1622 
1623 	optparamlen = ibuf_size(opb);
1624 	len = MSGSIZE_OPEN_MIN + optparamlen;
1625 	if (optparamlen == 0) {
1626 		/* nothing */
1627 	} else if (optparamlen + 2 >= 255) {
1628 		/* RFC9072: use 255 as magic size and request extra header */
1629 		optparamlen = 255;
1630 		extlen = 1;
1631 		/* 3 byte OPT_PARAM_EXT_LEN and OPT_PARAM_CAPABILITIES */
1632 		len += 2 * 3;
1633 	} else {
1634 		/* regular capabilities header */
1635 		optparamlen += 2;
1636 		len += 2;
1637 	}
1638 
1639 	if ((buf = session_newmsg(BGP_OPEN, len)) == NULL) {
1640 		ibuf_free(opb);
1641 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1642 		return;
1643 	}
1644 
1645 	if (p->conf.holdtime)
1646 		holdtime = p->conf.holdtime;
1647 	else
1648 		holdtime = conf->holdtime;
1649 
1650 	errs += ibuf_add_n8(buf, 4);
1651 	errs += ibuf_add_n16(buf, p->conf.local_short_as);
1652 	errs += ibuf_add_n16(buf, holdtime);
1653 	/* is already in network byte order */
1654 	errs += ibuf_add_n32(buf, conf->bgpid);
1655 	errs += ibuf_add_n8(buf, optparamlen);
1656 
1657 	if (extlen) {
1658 		/* RFC9072 extra header which spans over the capabilities hdr */
1659 		errs += ibuf_add_n8(buf, OPT_PARAM_EXT_LEN);
1660 		errs += ibuf_add_n16(buf, ibuf_size(opb) + 1 + 2);
1661 	}
1662 
1663 	if (optparamlen) {
1664 		errs += ibuf_add_n8(buf, OPT_PARAM_CAPABILITIES);
1665 
1666 		if (extlen) {
1667 			/* RFC9072: 2-byte extended length */
1668 			errs += ibuf_add_n16(buf, ibuf_size(opb));
1669 		} else {
1670 			errs += ibuf_add_n8(buf, ibuf_size(opb));
1671 		}
1672 		errs += ibuf_add_ibuf(buf, opb);
1673 	}
1674 
1675 	ibuf_free(opb);
1676 
1677 	if (errs) {
1678 		ibuf_free(buf);
1679 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1680 		return;
1681 	}
1682 
1683 	session_sendmsg(buf, p, BGP_OPEN);
1684 	p->stats.msg_sent_open++;
1685 }
1686 
1687 void
session_keepalive(struct peer * p)1688 session_keepalive(struct peer *p)
1689 {
1690 	struct ibuf		*buf;
1691 
1692 	if ((buf = session_newmsg(BGP_KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL) {
1693 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1694 		return;
1695 	}
1696 
1697 	session_sendmsg(buf, p, BGP_KEEPALIVE);
1698 	start_timer_keepalive(p);
1699 	p->stats.msg_sent_keepalive++;
1700 }
1701 
1702 void
session_update(uint32_t peerid,struct ibuf * ibuf)1703 session_update(uint32_t peerid, struct ibuf *ibuf)
1704 {
1705 	struct peer	*p;
1706 	struct ibuf	*buf;
1707 	size_t		 len, maxsize = MAX_PKTSIZE;
1708 
1709 	if ((p = getpeerbyid(conf, peerid)) == NULL) {
1710 		log_warnx("%s: no such peer: id=%u", __func__, peerid);
1711 		return;
1712 	}
1713 
1714 	if (p->state != STATE_ESTABLISHED)
1715 		return;
1716 
1717 	if (p->capa.neg.ext_msg)
1718 		maxsize = MAX_EXT_PKTSIZE;
1719 	len = ibuf_size(ibuf);
1720 	if (len < MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER ||
1721 	    len > maxsize - MSGSIZE_HEADER) {
1722 		log_peer_warnx(&p->conf, "bad UDPATE from RDE");
1723 		return;
1724 	}
1725 
1726 	if ((buf = session_newmsg(BGP_UPDATE, MSGSIZE_HEADER + len)) == NULL) {
1727 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1728 		return;
1729 	}
1730 
1731 	if (ibuf_add_ibuf(buf, ibuf)) {
1732 		ibuf_free(buf);
1733 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1734 		return;
1735 	}
1736 
1737 	session_sendmsg(buf, p, BGP_UPDATE);
1738 	start_timer_keepalive(p);
1739 	p->stats.msg_sent_update++;
1740 }
1741 
1742 /* Return 1 if a hard reset should be issued, 0 for a graceful notification */
1743 static int
session_req_hard_reset(enum err_codes errcode,uint8_t subcode)1744 session_req_hard_reset(enum err_codes errcode, uint8_t subcode)
1745 {
1746 	switch (errcode) {
1747 	case ERR_HEADER:
1748 	case ERR_OPEN:
1749 	case ERR_UPDATE:
1750 	case ERR_FSM:
1751 	case ERR_RREFRESH:
1752 		/*
1753 		 * Protocol errors trigger a hard reset. The peer
1754 		 * is not trustworthy and so there is no realistic
1755 		 * hope that forwarding can continue.
1756 		 */
1757 		break;
1758 	case ERR_HOLDTIMEREXPIRED:
1759 	case ERR_SENDHOLDTIMEREXPIRED:
1760 		/* Keep forwarding and hope the other side is back soon. */
1761 		return 0;
1762 	case ERR_CEASE:
1763 		switch (subcode) {
1764 		case ERR_CEASE_CONN_REJECT:
1765 		case ERR_CEASE_OTHER_CHANGE:
1766 		case ERR_CEASE_COLLISION:
1767 		case ERR_CEASE_RSRC_EXHAUST:
1768 			/* Per RFC8538 suggestion make these graceful. */
1769 			return 0;
1770 		}
1771 		break;
1772 	}
1773 	return 1;
1774 }
1775 
1776 void
session_notification_data(struct peer * p,uint8_t errcode,uint8_t subcode,void * data,size_t datalen)1777 session_notification_data(struct peer *p, uint8_t errcode, uint8_t subcode,
1778     void *data, size_t datalen)
1779 {
1780 	struct ibuf ibuf;
1781 
1782 	ibuf_from_buffer(&ibuf, data, datalen);
1783 	session_notification(p, errcode, subcode, &ibuf);
1784 }
1785 
1786 void
session_notification(struct peer * p,uint8_t errcode,uint8_t subcode,struct ibuf * ibuf)1787 session_notification(struct peer *p, uint8_t errcode, uint8_t subcode,
1788     struct ibuf *ibuf)
1789 {
1790 	struct ibuf		*buf;
1791 	const char		*reason = "sending";
1792 	int			 errs = 0, need_hard_reset = 0;
1793 	size_t			 datalen = 0;
1794 
1795 	switch (p->state) {
1796 	case STATE_OPENSENT:
1797 	case STATE_OPENCONFIRM:
1798 	case STATE_ESTABLISHED:
1799 		break;
1800 	default:
1801 		/* session not open, no need to send notification */
1802 		log_notification(p, errcode, subcode, ibuf, "dropping");
1803 		return;
1804 	}
1805 
1806 	if (p->capa.neg.grestart.grnotification) {
1807 		if (session_req_hard_reset(errcode, subcode)) {
1808 			need_hard_reset = 1;
1809 			datalen += 2;
1810 			reason = "sending hard-reset";
1811 		} else {
1812 			reason = "sending graceful";
1813 		}
1814 	}
1815 
1816 	log_notification(p, errcode, subcode, ibuf, reason);
1817 
1818 	/* cap to maximum size */
1819 	if (ibuf != NULL) {
1820 		if (ibuf_size(ibuf) >
1821 		    MAX_PKTSIZE - MSGSIZE_NOTIFICATION_MIN - datalen) {
1822 			log_peer_warnx(&p->conf,
1823 			    "oversized notification, data trunkated");
1824 			ibuf_truncate(ibuf, MAX_PKTSIZE -
1825 			    MSGSIZE_NOTIFICATION_MIN - datalen);
1826 		}
1827 		datalen += ibuf_size(ibuf);
1828 	}
1829 
1830 	if ((buf = session_newmsg(BGP_NOTIFICATION,
1831 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1832 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1833 		return;
1834 	}
1835 
1836 	if (need_hard_reset) {
1837 		errs += ibuf_add_n8(buf, ERR_CEASE);
1838 		errs += ibuf_add_n8(buf, ERR_CEASE_HARD_RESET);
1839 	}
1840 
1841 	errs += ibuf_add_n8(buf, errcode);
1842 	errs += ibuf_add_n8(buf, subcode);
1843 
1844 	if (ibuf != NULL)
1845 		errs += ibuf_add_ibuf(buf, ibuf);
1846 
1847 	if (errs) {
1848 		ibuf_free(buf);
1849 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1850 		return;
1851 	}
1852 
1853 	session_sendmsg(buf, p, BGP_NOTIFICATION);
1854 	p->stats.msg_sent_notification++;
1855 	p->stats.last_sent_errcode = errcode;
1856 	p->stats.last_sent_suberr = subcode;
1857 }
1858 
1859 int
session_neighbor_rrefresh(struct peer * p)1860 session_neighbor_rrefresh(struct peer *p)
1861 {
1862 	uint8_t	i;
1863 
1864 	if (!(p->capa.neg.refresh || p->capa.neg.enhanced_rr))
1865 		return (-1);
1866 
1867 	for (i = AID_MIN; i < AID_MAX; i++) {
1868 		if (p->capa.neg.mp[i] != 0)
1869 			session_rrefresh(p, i, ROUTE_REFRESH_REQUEST);
1870 	}
1871 
1872 	return (0);
1873 }
1874 
1875 void
session_rrefresh(struct peer * p,uint8_t aid,uint8_t subtype)1876 session_rrefresh(struct peer *p, uint8_t aid, uint8_t subtype)
1877 {
1878 	struct ibuf		*buf;
1879 	int			 errs = 0;
1880 	uint16_t		 afi;
1881 	uint8_t			 safi;
1882 
1883 	switch (subtype) {
1884 	case ROUTE_REFRESH_REQUEST:
1885 		p->stats.refresh_sent_req++;
1886 		break;
1887 	case ROUTE_REFRESH_BEGIN_RR:
1888 	case ROUTE_REFRESH_END_RR:
1889 		/* requires enhanced route refresh */
1890 		if (!p->capa.neg.enhanced_rr)
1891 			return;
1892 		if (subtype == ROUTE_REFRESH_BEGIN_RR)
1893 			p->stats.refresh_sent_borr++;
1894 		else
1895 			p->stats.refresh_sent_eorr++;
1896 		break;
1897 	default:
1898 		fatalx("session_rrefresh: bad subtype %d", subtype);
1899 	}
1900 
1901 	if (aid2afi(aid, &afi, &safi) == -1)
1902 		fatalx("session_rrefresh: bad afi/safi pair");
1903 
1904 	if ((buf = session_newmsg(BGP_RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1905 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1906 		return;
1907 	}
1908 
1909 	errs += ibuf_add_n16(buf, afi);
1910 	errs += ibuf_add_n8(buf, subtype);
1911 	errs += ibuf_add_n8(buf, safi);
1912 
1913 	if (errs) {
1914 		ibuf_free(buf);
1915 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
1916 		return;
1917 	}
1918 
1919 	session_sendmsg(buf, p, BGP_RREFRESH);
1920 	p->stats.msg_sent_rrefresh++;
1921 }
1922 
1923 int
session_graceful_restart(struct peer * p)1924 session_graceful_restart(struct peer *p)
1925 {
1926 	uint8_t	i;
1927 	uint16_t staletime = conf->staletime;
1928 
1929 	if (p->conf.staletime)
1930 		staletime = p->conf.staletime;
1931 
1932 	/* RFC 8538: enforce configurable upper bound of the stale timer */
1933 	if (staletime > p->capa.neg.grestart.timeout)
1934 		staletime = p->capa.neg.grestart.timeout;
1935 	timer_set(&p->timers, Timer_RestartTimeout, staletime);
1936 
1937 	for (i = AID_MIN; i < AID_MAX; i++) {
1938 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_PRESENT) {
1939 			if (imsg_rde(IMSG_SESSION_STALE, p->conf.id,
1940 			    &i, sizeof(i)) == -1)
1941 				return (-1);
1942 			log_peer_warnx(&p->conf,
1943 			    "graceful restart of %s, keeping routes",
1944 			    aid2str(i));
1945 			p->capa.neg.grestart.flags[i] |= CAPA_GR_RESTARTING;
1946 		} else if (p->capa.neg.mp[i]) {
1947 			if (imsg_rde(IMSG_SESSION_NOGRACE, p->conf.id,
1948 			    &i, sizeof(i)) == -1)
1949 				return (-1);
1950 			log_peer_warnx(&p->conf,
1951 			    "graceful restart of %s, flushing routes",
1952 			    aid2str(i));
1953 		}
1954 	}
1955 	return (0);
1956 }
1957 
1958 int
session_graceful_stop(struct peer * p)1959 session_graceful_stop(struct peer *p)
1960 {
1961 	uint8_t	i;
1962 
1963 	for (i = AID_MIN; i < AID_MAX; i++) {
1964 		/*
1965 		 * Only flush if the peer is restarting and the timeout fired.
1966 		 * In all other cases the session was already flushed when the
1967 		 * session went down or when the new open message was parsed.
1968 		 */
1969 		if (p->capa.neg.grestart.flags[i] & CAPA_GR_RESTARTING) {
1970 			log_peer_warnx(&p->conf, "graceful restart of %s, "
1971 			    "time-out, flushing", aid2str(i));
1972 			if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
1973 			    &i, sizeof(i)) == -1)
1974 				return (-1);
1975 		}
1976 		p->capa.neg.grestart.flags[i] &= ~CAPA_GR_RESTARTING;
1977 	}
1978 	return (0);
1979 }
1980 
1981 int
session_dispatch_msg(struct pollfd * pfd,struct peer * p)1982 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1983 {
1984 	socklen_t	len;
1985 	int		error;
1986 
1987 	if (p->state == STATE_CONNECT) {
1988 		if (pfd->revents & POLLOUT) {
1989 			if (pfd->revents & POLLIN) {
1990 				/* error occurred */
1991 				len = sizeof(error);
1992 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1993 				    &error, &len) == -1 || error) {
1994 					if (error)
1995 						errno = error;
1996 					if (errno != p->lasterr) {
1997 						log_peer_warn(&p->conf,
1998 						    "socket error");
1999 						p->lasterr = errno;
2000 					}
2001 					bgp_fsm(p, EVNT_CON_OPENFAIL, NULL);
2002 					return (1);
2003 				}
2004 			}
2005 			bgp_fsm(p, EVNT_CON_OPEN, NULL);
2006 			return (1);
2007 		}
2008 		if (pfd->revents & POLLHUP) {
2009 			bgp_fsm(p, EVNT_CON_OPENFAIL, NULL);
2010 			return (1);
2011 		}
2012 		if (pfd->revents & (POLLERR|POLLNVAL)) {
2013 			bgp_fsm(p, EVNT_CON_FATAL, NULL);
2014 			return (1);
2015 		}
2016 		return (0);
2017 	}
2018 
2019 	if (pfd->revents & POLLHUP) {
2020 		bgp_fsm(p, EVNT_CON_CLOSED, NULL);
2021 		return (1);
2022 	}
2023 	if (pfd->revents & (POLLERR|POLLNVAL)) {
2024 		bgp_fsm(p, EVNT_CON_FATAL, NULL);
2025 		return (1);
2026 	}
2027 
2028 	if (pfd->revents & POLLOUT && msgbuf_queuelen(p->wbuf) > 0) {
2029 		if (ibuf_write(p->fd, p->wbuf) == -1) {
2030 			if (errno == EPIPE)
2031 				log_peer_warnx(&p->conf, "Connection closed");
2032 			else
2033 				log_peer_warn(&p->conf, "write error");
2034 			bgp_fsm(p, EVNT_CON_FATAL, NULL);
2035 			return (1);
2036 		}
2037 		p->stats.last_write = getmonotime();
2038 		start_timer_sendholdtime(p);
2039 		if (p->throttled &&
2040 		    msgbuf_queuelen(p->wbuf) < SESS_MSG_LOW_MARK) {
2041 			if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
2042 				log_peer_warn(&p->conf, "imsg_compose XON");
2043 			else
2044 				p->throttled = 0;
2045 		}
2046 		if (!(pfd->revents & POLLIN))
2047 			return (1);
2048 	}
2049 
2050 	if (p->fd != -1 && pfd->revents & POLLIN) {
2051 		switch (ibuf_read(p->fd, p->wbuf)) {
2052 		case -1:
2053 			if (p->state == STATE_IDLE)
2054 				/* error already handled before */
2055 				return (1);
2056 			log_peer_warn(&p->conf, "read error");
2057 			bgp_fsm(p, EVNT_CON_FATAL, NULL);
2058 			return (1);
2059 		case 0:
2060 			bgp_fsm(p, EVNT_CON_CLOSED, NULL);
2061 			return (1);
2062 		}
2063 		p->stats.last_read = getmonotime();
2064 		return (1);
2065 	}
2066 	return (0);
2067 }
2068 
2069 void
session_process_msg(struct peer * p)2070 session_process_msg(struct peer *p)
2071 {
2072 	struct ibuf	*msg;
2073 	struct mrt	*mrt;
2074 	int		processed = 0;
2075 	uint8_t		msgtype;
2076 
2077 	p->rpending = 0;
2078 	if (p->wbuf == NULL)
2079 		return;
2080 
2081 	/*
2082 	 * session might drop to IDLE -> all buffers are flushed
2083 	 */
2084 	while ((msg = msgbuf_get(p->wbuf)) != NULL) {
2085 		/* skip msg header and extract type */
2086 		if (ibuf_skip(msg, MSGSIZE_HEADER_MARKER) == -1 ||
2087 		    ibuf_skip(msg, sizeof(uint16_t)) == -1 ||
2088 		    ibuf_get_n8(msg, &msgtype) == -1) {
2089 			log_peer_warn(&p->conf, "process message failed");
2090 			bgp_fsm(p, EVNT_CON_FATAL, NULL);
2091 			ibuf_free(msg);
2092 			return;
2093 		}
2094 		ibuf_rewind(msg);
2095 
2096 		/* dump to MRT as soon as we have a full packet */
2097 		LIST_FOREACH(mrt, &mrthead, entry) {
2098 			if (!(mrt->type == MRT_ALL_IN ||
2099 			    (msgtype == BGP_UPDATE &&
2100 			    mrt->type == MRT_UPDATE_IN)))
2101 				continue;
2102 			if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
2103 			    mrt->peer_id == p->conf.id || (mrt->group_id != 0 &&
2104 			    mrt->group_id == p->conf.groupid))
2105 				mrt_dump_bgp_msg(mrt, msg, p, msgtype);
2106 		}
2107 
2108 		ibuf_skip(msg, MSGSIZE_HEADER);
2109 
2110 		switch (msgtype) {
2111 		case BGP_OPEN:
2112 			bgp_fsm(p, EVNT_RCVD_OPEN, msg);
2113 			p->stats.msg_rcvd_open++;
2114 			break;
2115 		case BGP_UPDATE:
2116 			bgp_fsm(p, EVNT_RCVD_UPDATE, msg);
2117 			p->stats.msg_rcvd_update++;
2118 			break;
2119 		case BGP_NOTIFICATION:
2120 			bgp_fsm(p, EVNT_RCVD_NOTIFICATION, msg);
2121 			p->stats.msg_rcvd_notification++;
2122 			break;
2123 		case BGP_KEEPALIVE:
2124 			bgp_fsm(p, EVNT_RCVD_KEEPALIVE, msg);
2125 			p->stats.msg_rcvd_keepalive++;
2126 			break;
2127 		case BGP_RREFRESH:
2128 			parse_rrefresh(p, msg);
2129 			p->stats.msg_rcvd_rrefresh++;
2130 			break;
2131 		default:	/* cannot happen */
2132 			session_notification_data(p, ERR_HEADER, ERR_HDR_TYPE,
2133 			    &msgtype, 1);
2134 			log_peer_warnx(&p->conf,
2135 			    "received message with unknown type %u", msgtype);
2136 			bgp_fsm(p, EVNT_CON_FATAL, NULL);
2137 		}
2138 		ibuf_free(msg);
2139 		if (++processed > MSG_PROCESS_LIMIT) {
2140 			p->rpending = 1;
2141 			break;
2142 		}
2143 	}
2144 }
2145 
2146 struct ibuf *
parse_header(struct ibuf * msg,void * arg,int * fd)2147 parse_header(struct ibuf *msg, void *arg, int *fd)
2148 {
2149 	struct peer		*peer = arg;
2150 	struct ibuf		*b;
2151 	u_char			 m[MSGSIZE_HEADER_MARKER];
2152 	uint16_t		 len, maxlen = MAX_PKTSIZE;
2153 	uint8_t			 type;
2154 
2155 	if (ibuf_get(msg, m, sizeof(m)) == -1 ||
2156 	    ibuf_get_n16(msg, &len) == -1 ||
2157 	    ibuf_get_n8(msg, &type) == -1)
2158 		return (NULL);
2159 	/* caller MUST make sure we are getting 19 bytes! */
2160 	if (memcmp(m, marker, sizeof(marker))) {
2161 		log_peer_warnx(&peer->conf, "sync error");
2162 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL);
2163 		bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2164 		errno = EINVAL;
2165 		return (NULL);
2166 	}
2167 
2168 	if (peer->capa.ann.ext_msg)
2169 		maxlen = MAX_EXT_PKTSIZE;
2170 
2171 	if (len < MSGSIZE_HEADER || len > maxlen) {
2172 		log_peer_warnx(&peer->conf,
2173 		    "received message: illegal length: %u byte", len);
2174 		goto badlen;
2175 	}
2176 
2177 	switch (type) {
2178 	case BGP_OPEN:
2179 		if (len < MSGSIZE_OPEN_MIN || len > MAX_PKTSIZE) {
2180 			log_peer_warnx(&peer->conf,
2181 			    "received OPEN: illegal len: %u byte", len);
2182 			goto badlen;
2183 		}
2184 		break;
2185 	case BGP_NOTIFICATION:
2186 		if (len < MSGSIZE_NOTIFICATION_MIN) {
2187 			log_peer_warnx(&peer->conf,
2188 			    "received NOTIFICATION: illegal len: %u byte", len);
2189 			goto badlen;
2190 		}
2191 		break;
2192 	case BGP_UPDATE:
2193 		if (len < MSGSIZE_UPDATE_MIN) {
2194 			log_peer_warnx(&peer->conf,
2195 			    "received UPDATE: illegal len: %u byte", len);
2196 			goto badlen;
2197 		}
2198 		break;
2199 	case BGP_KEEPALIVE:
2200 		if (len != MSGSIZE_KEEPALIVE) {
2201 			log_peer_warnx(&peer->conf,
2202 			    "received KEEPALIVE: illegal len: %u byte", len);
2203 			goto badlen;
2204 		}
2205 		break;
2206 	case BGP_RREFRESH:
2207 		if (len < MSGSIZE_RREFRESH_MIN) {
2208 			log_peer_warnx(&peer->conf,
2209 			    "received RREFRESH: illegal len: %u byte", len);
2210 			goto badlen;
2211 		}
2212 		break;
2213 	default:
2214 		log_peer_warnx(&peer->conf,
2215 		    "received msg with unknown type %u", type);
2216 		session_notification_data(peer, ERR_HEADER, ERR_HDR_TYPE,
2217 		    &type, sizeof(type));
2218 		bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2219 		errno = EINVAL;
2220 		return (NULL);
2221 	}
2222 
2223 	if ((b = ibuf_open(len)) == NULL)
2224 		return (NULL);
2225 	return (b);
2226 
2227  badlen:
2228 	len = htons(len);
2229 	session_notification_data(peer, ERR_HEADER, ERR_HDR_LEN,
2230 	    &len, sizeof(len));
2231 	bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2232 	errno = ERANGE;
2233 	return (NULL);
2234 }
2235 
2236 int
parse_open(struct peer * peer,struct ibuf * msg)2237 parse_open(struct peer *peer, struct ibuf *msg)
2238 {
2239 	uint8_t		 version, rversion;
2240 	uint16_t	 short_as;
2241 	uint16_t	 holdtime, myholdtime;
2242 	uint32_t	 as, bgpid;
2243 	uint8_t		 optparamlen;
2244 
2245 	if (ibuf_get_n8(msg, &version) == -1 ||
2246 	    ibuf_get_n16(msg, &short_as) == -1 ||
2247 	    ibuf_get_n16(msg, &holdtime) == -1 ||
2248 	    ibuf_get_n32(msg, &bgpid) == -1 ||
2249 	    ibuf_get_n8(msg, &optparamlen) == -1)
2250 		goto bad_len;
2251 
2252 	if (version != BGP_VERSION) {
2253 		log_peer_warnx(&peer->conf,
2254 		    "peer wants unrecognized version %u", version);
2255 		if (version > BGP_VERSION)
2256 			rversion = version - BGP_VERSION;
2257 		else
2258 			rversion = BGP_VERSION;
2259 		session_notification_data(peer, ERR_OPEN, ERR_OPEN_VERSION,
2260 		    &rversion, sizeof(rversion));
2261 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2262 		return (-1);
2263 	}
2264 
2265 	as = peer->short_as = short_as;
2266 	if (as == 0) {
2267 		log_peer_warnx(&peer->conf,
2268 		    "peer requests unacceptable AS %u", as);
2269 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2270 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2271 		return (-1);
2272 	}
2273 
2274 	if (holdtime && holdtime < peer->conf.min_holdtime) {
2275 		log_peer_warnx(&peer->conf,
2276 		    "peer requests unacceptable holdtime %u", holdtime);
2277 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME, NULL);
2278 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2279 		return (-1);
2280 	}
2281 
2282 	myholdtime = peer->conf.holdtime;
2283 	if (!myholdtime)
2284 		myholdtime = conf->holdtime;
2285 	if (holdtime < myholdtime)
2286 		peer->holdtime = holdtime;
2287 	else
2288 		peer->holdtime = myholdtime;
2289 
2290 	/* check bgpid for validity - just disallow 0 */
2291 	if (bgpid == 0) {
2292 		log_peer_warnx(&peer->conf, "peer BGPID 0 unacceptable");
2293 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2294 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2295 		return (-1);
2296 	}
2297 	peer->remote_bgpid = bgpid;
2298 
2299 	if (optparamlen != 0) {
2300 		struct ibuf oparams, op;
2301 		uint8_t ext_type, op_type;
2302 		uint16_t ext_len, op_len;
2303 
2304 		ibuf_from_ibuf(&oparams, msg);
2305 
2306 		/* check for RFC9072 encoding */
2307 		if (ibuf_get_n8(&oparams, &ext_type) == -1)
2308 			goto bad_len;
2309 		if (ext_type == OPT_PARAM_EXT_LEN) {
2310 			if (ibuf_get_n16(&oparams, &ext_len) == -1)
2311 				goto bad_len;
2312 			/* skip RFC9072 header */
2313 			if (ibuf_skip(msg, 3) == -1)
2314 				goto bad_len;
2315 		} else {
2316 			ext_len = optparamlen;
2317 			ibuf_rewind(&oparams);
2318 		}
2319 
2320 		if (ibuf_truncate(&oparams, ext_len) == -1 ||
2321 		    ibuf_skip(msg, ext_len) == -1)
2322 			goto bad_len;
2323 
2324 		while (ibuf_size(&oparams) > 0) {
2325 			if (ibuf_get_n8(&oparams, &op_type) == -1)
2326 				goto bad_len;
2327 
2328 			if (ext_type == OPT_PARAM_EXT_LEN) {
2329 				if (ibuf_get_n16(&oparams, &op_len) == -1)
2330 					goto bad_len;
2331 			} else {
2332 				uint8_t tmp;
2333 				if (ibuf_get_n8(&oparams, &tmp) == -1)
2334 					goto bad_len;
2335 				op_len = tmp;
2336 			}
2337 
2338 			if (ibuf_get_ibuf(&oparams, op_len, &op) == -1)
2339 				goto bad_len;
2340 
2341 			switch (op_type) {
2342 			case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
2343 				if (parse_capabilities(peer, &op, &as) == -1) {
2344 					session_notification(peer, ERR_OPEN, 0,
2345 					    NULL);
2346 					change_state(peer, STATE_IDLE,
2347 					    EVNT_RCVD_OPEN);
2348 					return (-1);
2349 				}
2350 				break;
2351 			case OPT_PARAM_AUTH:			/* deprecated */
2352 			default:
2353 				/*
2354 				 * unsupported type
2355 				 * the RFCs tell us to leave the data section
2356 				 * empty and notify the peer with ERR_OPEN,
2357 				 * ERR_OPEN_OPT. How the peer should know
2358 				 * _which_ optional parameter we don't support
2359 				 * is beyond me.
2360 				 */
2361 				log_peer_warnx(&peer->conf,
2362 				    "received OPEN message with unsupported "
2363 				    "optional parameter: type %u", op_type);
2364 				session_notification(peer, ERR_OPEN,
2365 				    ERR_OPEN_OPT, NULL);
2366 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2367 				return (-1);
2368 			}
2369 		}
2370 	}
2371 
2372 	if (ibuf_size(msg) != 0) {
2373  bad_len:
2374 		log_peer_warnx(&peer->conf,
2375 		    "corrupt OPEN message received: length mismatch");
2376 		session_notification(peer, ERR_OPEN, 0, NULL);
2377 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2378 		return (-1);
2379 	}
2380 
2381 	/*
2382 	 * if remote-as is zero and it's a cloned neighbor, accept any
2383 	 * but only on the first connect, after that the remote-as needs
2384 	 * to remain the same.
2385 	 */
2386 	if (peer->template && !peer->conf.remote_as && as != AS_TRANS) {
2387 		peer->conf.remote_as = as;
2388 		peer->conf.ebgp = (peer->conf.remote_as != peer->conf.local_as);
2389 		if (!peer->conf.ebgp)
2390 			/* force enforce_as off for iBGP sessions */
2391 			peer->conf.enforce_as = ENFORCE_AS_OFF;
2392 	}
2393 
2394 	if (peer->conf.remote_as != as) {
2395 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
2396 		    log_as(as));
2397 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL);
2398 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2399 		return (-1);
2400 	}
2401 
2402 	/* on iBGP sessions check for bgpid collision */
2403 	if (!peer->conf.ebgp && peer->remote_bgpid == conf->bgpid) {
2404 		struct in_addr ina;
2405 		ina.s_addr = htonl(bgpid);
2406 		log_peer_warnx(&peer->conf, "peer BGPID %s conflicts with ours",
2407 		    inet_ntoa(ina));
2408 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID, NULL);
2409 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2410 		return (-1);
2411 	}
2412 
2413 	if (capa_neg_calc(peer) == -1) {
2414 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2415 		return (-1);
2416 	}
2417 
2418 	return (0);
2419 }
2420 
2421 int
parse_update(struct peer * peer,struct ibuf * msg)2422 parse_update(struct peer *peer, struct ibuf *msg)
2423 {
2424 	/*
2425 	 * we pass the message verbatim to the rde.
2426 	 * in case of errors the whole session is reset with a
2427 	 * notification anyway, we only need to know the peer
2428 	 */
2429 	if (imsg_rde(IMSG_UPDATE, peer->conf.id, ibuf_data(msg),
2430 	    ibuf_size(msg)) == -1)
2431 		return (-1);
2432 
2433 	return (0);
2434 }
2435 
2436 int
parse_rrefresh(struct peer * peer,struct ibuf * msg)2437 parse_rrefresh(struct peer *peer, struct ibuf *msg)
2438 {
2439 	struct route_refresh rr;
2440 	uint16_t afi, datalen;
2441 	uint8_t aid, safi, subtype;
2442 
2443 	datalen = ibuf_size(msg) + MSGSIZE_HEADER;
2444 
2445 	if (ibuf_get_n16(msg, &afi) == -1 ||
2446 	    ibuf_get_n8(msg, &subtype) == -1 ||
2447 	    ibuf_get_n8(msg, &safi) == -1) {
2448 		/* minimum size checked in session_process_msg() */
2449 		fatalx("%s: message too small", __func__);
2450 	}
2451 
2452 	/* check subtype if peer announced enhanced route refresh */
2453 	if (peer->capa.neg.enhanced_rr) {
2454 		switch (subtype) {
2455 		case ROUTE_REFRESH_REQUEST:
2456 			/* no ORF support, so no oversized RREFRESH msgs */
2457 			if (datalen != MSGSIZE_RREFRESH) {
2458 				log_peer_warnx(&peer->conf,
2459 				    "received RREFRESH: illegal len: %u byte",
2460 				    datalen);
2461 				datalen = htons(datalen);
2462 				session_notification_data(peer, ERR_HEADER,
2463 				    ERR_HDR_LEN, &datalen, sizeof(datalen));
2464 				bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2465 				return (-1);
2466 			}
2467 			peer->stats.refresh_rcvd_req++;
2468 			break;
2469 		case ROUTE_REFRESH_BEGIN_RR:
2470 		case ROUTE_REFRESH_END_RR:
2471 			/* special handling for RFC7313 */
2472 			if (datalen != MSGSIZE_RREFRESH) {
2473 				log_peer_warnx(&peer->conf,
2474 				    "received RREFRESH: illegal len: %u byte",
2475 				    datalen);
2476 				ibuf_rewind(msg);
2477 				session_notification(peer, ERR_RREFRESH,
2478 				    ERR_RR_INV_LEN, msg);
2479 				bgp_fsm(peer, EVNT_CON_FATAL, NULL);
2480 				return (-1);
2481 			}
2482 			if (subtype == ROUTE_REFRESH_BEGIN_RR)
2483 				peer->stats.refresh_rcvd_borr++;
2484 			else
2485 				peer->stats.refresh_rcvd_eorr++;
2486 			break;
2487 		default:
2488 			log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2489 			    "bad subtype %d", subtype);
2490 			return (0);
2491 		}
2492 	} else {
2493 		/* force subtype to default */
2494 		subtype = ROUTE_REFRESH_REQUEST;
2495 		peer->stats.refresh_rcvd_req++;
2496 	}
2497 
2498 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2499 	if (afi2aid(afi, safi, &aid) == -1) {
2500 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2501 		    "invalid afi/safi pair");
2502 		return (0);
2503 	}
2504 
2505 	if (!peer->capa.neg.refresh && !peer->capa.neg.enhanced_rr) {
2506 		log_peer_warnx(&peer->conf, "peer sent unexpected refresh");
2507 		return (0);
2508 	}
2509 
2510 	rr.aid = aid;
2511 	rr.subtype = subtype;
2512 
2513 	if (imsg_rde(IMSG_REFRESH, peer->conf.id, &rr, sizeof(rr)) == -1)
2514 		return (-1);
2515 
2516 	return (0);
2517 }
2518 
2519 void
parse_notification(struct peer * peer,struct ibuf * msg)2520 parse_notification(struct peer *peer, struct ibuf *msg)
2521 {
2522 	const char		*reason = "received";
2523 	uint8_t			 errcode, subcode;
2524 	uint8_t			 reason_len;
2525 	enum session_events	 event = EVNT_RCVD_NOTIFICATION;
2526 
2527 	if (ibuf_get_n8(msg, &errcode) == -1 ||
2528 	    ibuf_get_n8(msg, &subcode) == -1) {
2529 		log_peer_warnx(&peer->conf, "received bad notification");
2530 		goto done;
2531 	}
2532 
2533 	/* RFC8538: check for hard-reset or graceful notification */
2534 	if (peer->capa.neg.grestart.grnotification) {
2535 		if (errcode == ERR_CEASE && subcode == ERR_CEASE_HARD_RESET) {
2536 			if (ibuf_get_n8(msg, &errcode) == -1 ||
2537 			    ibuf_get_n8(msg, &subcode) == -1) {
2538 				log_peer_warnx(&peer->conf,
2539 				    "received bad hard-reset notification");
2540 				goto done;
2541 			}
2542 			reason = "received hard-reset";
2543 		} else {
2544 			reason = "received graceful";
2545 			event = EVNT_RCVD_GRACE_NOTIFICATION;
2546 		}
2547 	}
2548 
2549 	peer->errcnt++;
2550 	peer->stats.last_rcvd_errcode = errcode;
2551 	peer->stats.last_rcvd_suberr = subcode;
2552 
2553 	log_notification(peer, errcode, subcode, msg, reason);
2554 
2555 	CTASSERT(sizeof(peer->stats.last_reason) > UINT8_MAX);
2556 	memset(peer->stats.last_reason, 0, sizeof(peer->stats.last_reason));
2557 	if (errcode == ERR_CEASE &&
2558 	    (subcode == ERR_CEASE_ADMIN_DOWN ||
2559 	     subcode == ERR_CEASE_ADMIN_RESET)) {
2560 		/* check if shutdown reason is included */
2561 		if (ibuf_get_n8(msg, &reason_len) != -1 && reason_len != 0) {
2562 			if (ibuf_get(msg, peer->stats.last_reason,
2563 			    reason_len) == -1)
2564 				log_peer_warnx(&peer->conf,
2565 				    "received truncated shutdown reason");
2566 		}
2567 	}
2568 
2569 done:
2570 	change_state(peer, STATE_IDLE, event);
2571 }
2572 
2573 int
parse_capabilities(struct peer * peer,struct ibuf * buf,uint32_t * as)2574 parse_capabilities(struct peer *peer, struct ibuf *buf, uint32_t *as)
2575 {
2576 	struct ibuf	 capabuf;
2577 	uint16_t	 afi, nhafi, gr_header;
2578 	uint8_t		 capa_code, capa_len;
2579 	uint8_t		 safi, aid, role, flags;
2580 
2581 	while (ibuf_size(buf) > 0) {
2582 		if (ibuf_get_n8(buf, &capa_code) == -1 ||
2583 		    ibuf_get_n8(buf, &capa_len) == -1) {
2584 			log_peer_warnx(&peer->conf, "Bad capabilities attr "
2585 			    "length: too short");
2586 			return (-1);
2587 		}
2588 		if (ibuf_get_ibuf(buf, capa_len, &capabuf) == -1) {
2589 			log_peer_warnx(&peer->conf,
2590 			    "Received bad capabilities attr length: "
2591 			    "len %zu smaller than capa_len %u",
2592 			    ibuf_size(buf), capa_len);
2593 			return (-1);
2594 		}
2595 
2596 		switch (capa_code) {
2597 		case CAPA_MP:			/* RFC 4760 */
2598 			if (capa_len != 4 ||
2599 			    ibuf_get_n16(&capabuf, &afi) == -1 ||
2600 			    ibuf_skip(&capabuf, 1) == -1 ||
2601 			    ibuf_get_n8(&capabuf, &safi) == -1) {
2602 				log_peer_warnx(&peer->conf,
2603 				    "Received bad multi protocol capability");
2604 				break;
2605 			}
2606 			if (afi2aid(afi, safi, &aid) == -1) {
2607 				log_peer_warnx(&peer->conf,
2608 				    "Received multi protocol capability: "
2609 				    " unknown AFI %u, safi %u pair",
2610 				    afi, safi);
2611 				peer->capa.peer.mp[AID_UNSPEC] = 1;
2612 				break;
2613 			}
2614 			peer->capa.peer.mp[aid] = 1;
2615 			break;
2616 		case CAPA_REFRESH:
2617 			peer->capa.peer.refresh = 1;
2618 			break;
2619 		case CAPA_EXT_NEXTHOP:
2620 			while (ibuf_size(&capabuf) > 0) {
2621 				uint16_t tmp16;
2622 				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2623 				    ibuf_get_n16(&capabuf, &tmp16) == -1 ||
2624 				    ibuf_get_n16(&capabuf, &nhafi) == -1) {
2625 					log_peer_warnx(&peer->conf,
2626 					    "Received bad %s capability",
2627 					    log_capability(CAPA_EXT_NEXTHOP));
2628 					memset(peer->capa.peer.ext_nh, 0,
2629 					    sizeof(peer->capa.peer.ext_nh));
2630 					break;
2631 				}
2632 				safi = tmp16;
2633 				if (afi2aid(afi, safi, &aid) == -1 ||
2634 				    !(aid == AID_INET || aid == AID_VPN_IPv4)) {
2635 					log_peer_warnx(&peer->conf,
2636 					    "Received %s capability: "
2637 					    " unsupported AFI %u, safi %u pair",
2638 					    log_capability(CAPA_EXT_NEXTHOP),
2639 					    afi, safi);
2640 					continue;
2641 				}
2642 				if (nhafi != AFI_IPv6) {
2643 					log_peer_warnx(&peer->conf,
2644 					    "Received %s capability: "
2645 					    " unsupported nexthop AFI %u",
2646 					    log_capability(CAPA_EXT_NEXTHOP),
2647 					    nhafi);
2648 					continue;
2649 				}
2650 				peer->capa.peer.ext_nh[aid] = 1;
2651 			}
2652 			break;
2653 		case CAPA_EXT_MSG:
2654 			peer->capa.peer.ext_msg = 1;
2655 			break;
2656 		case CAPA_ROLE:
2657 			if (capa_len != 1 ||
2658 			    ibuf_get_n8(&capabuf, &role) == -1) {
2659 				log_peer_warnx(&peer->conf,
2660 				    "Received bad role capability");
2661 				break;
2662 			}
2663 			if (!peer->conf.ebgp) {
2664 				log_peer_warnx(&peer->conf,
2665 				    "Received role capability on iBGP session");
2666 				break;
2667 			}
2668 			peer->capa.peer.policy = 1;
2669 			peer->remote_role = capa2role(role);
2670 			break;
2671 		case CAPA_RESTART:
2672 			if (capa_len == 2) {
2673 				/* peer only supports EoR marker */
2674 				peer->capa.peer.grestart.restart = 1;
2675 				peer->capa.peer.grestart.timeout = 0;
2676 				break;
2677 			} else if (capa_len % 4 != 2) {
2678 				log_peer_warnx(&peer->conf,
2679 				    "Bad graceful restart capability");
2680 				peer->capa.peer.grestart.restart = 0;
2681 				peer->capa.peer.grestart.timeout = 0;
2682 				break;
2683 			}
2684 
2685 			if (ibuf_get_n16(&capabuf, &gr_header) == -1) {
2686  bad_gr_restart:
2687 				log_peer_warnx(&peer->conf,
2688 				    "Bad graceful restart capability");
2689 				peer->capa.peer.grestart.restart = 0;
2690 				peer->capa.peer.grestart.timeout = 0;
2691 				break;
2692 			}
2693 
2694 			peer->capa.peer.grestart.timeout =
2695 			    gr_header & CAPA_GR_TIMEMASK;
2696 			if (peer->capa.peer.grestart.timeout == 0) {
2697 				log_peer_warnx(&peer->conf, "Received "
2698 				    "graceful restart with zero timeout");
2699 				peer->capa.peer.grestart.restart = 0;
2700 				break;
2701 			}
2702 
2703 			while (ibuf_size(&capabuf) > 0) {
2704 				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2705 				    ibuf_get_n8(&capabuf, &safi) == -1 ||
2706 				    ibuf_get_n8(&capabuf, &flags) == -1)
2707 					goto bad_gr_restart;
2708 				if (afi2aid(afi, safi, &aid) == -1) {
2709 					log_peer_warnx(&peer->conf,
2710 					    "Received graceful restart capa: "
2711 					    " unknown AFI %u, safi %u pair",
2712 					    afi, safi);
2713 					continue;
2714 				}
2715 				peer->capa.peer.grestart.flags[aid] |=
2716 				    CAPA_GR_PRESENT;
2717 				if (flags & CAPA_GR_F_FLAG)
2718 					peer->capa.peer.grestart.flags[aid] |=
2719 					    CAPA_GR_FORWARD;
2720 				if (gr_header & CAPA_GR_R_FLAG)
2721 					peer->capa.peer.grestart.flags[aid] |=
2722 					    CAPA_GR_RESTART;
2723 				peer->capa.peer.grestart.restart = 2;
2724 			}
2725 			if (gr_header & CAPA_GR_N_FLAG)
2726 				peer->capa.peer.grestart.grnotification = 1;
2727 			break;
2728 		case CAPA_AS4BYTE:
2729 			if (capa_len != 4 ||
2730 			    ibuf_get_n32(&capabuf, as) == -1) {
2731 				log_peer_warnx(&peer->conf,
2732 				    "Received bad AS4BYTE capability");
2733 				peer->capa.peer.as4byte = 0;
2734 				break;
2735 			}
2736 			if (*as == 0) {
2737 				log_peer_warnx(&peer->conf,
2738 				    "peer requests unacceptable AS %u", *as);
2739 				session_notification(peer, ERR_OPEN,
2740 				    ERR_OPEN_AS, NULL);
2741 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
2742 				return (-1);
2743 			}
2744 			peer->capa.peer.as4byte = 1;
2745 			break;
2746 		case CAPA_ADD_PATH:
2747 			if (capa_len % 4 != 0) {
2748  bad_add_path:
2749 				log_peer_warnx(&peer->conf,
2750 				    "Received bad ADD-PATH capability");
2751 				memset(peer->capa.peer.add_path, 0,
2752 				    sizeof(peer->capa.peer.add_path));
2753 				break;
2754 			}
2755 			while (ibuf_size(&capabuf) > 0) {
2756 				if (ibuf_get_n16(&capabuf, &afi) == -1 ||
2757 				    ibuf_get_n8(&capabuf, &safi) == -1 ||
2758 				    ibuf_get_n8(&capabuf, &flags) == -1)
2759 					goto bad_add_path;
2760 				if (afi2aid(afi, safi, &aid) == -1) {
2761 					log_peer_warnx(&peer->conf,
2762 					    "Received ADD-PATH capa: "
2763 					    " unknown AFI %u, safi %u pair",
2764 					    afi, safi);
2765 					memset(peer->capa.peer.add_path, 0,
2766 					    sizeof(peer->capa.peer.add_path));
2767 					break;
2768 				}
2769 				if (flags & ~CAPA_AP_BIDIR) {
2770 					log_peer_warnx(&peer->conf,
2771 					    "Received ADD-PATH capa: "
2772 					    " bad flags %x", flags);
2773 					memset(peer->capa.peer.add_path, 0,
2774 					    sizeof(peer->capa.peer.add_path));
2775 					break;
2776 				}
2777 				peer->capa.peer.add_path[aid] = flags;
2778 			}
2779 			break;
2780 		case CAPA_ENHANCED_RR:
2781 			peer->capa.peer.enhanced_rr = 1;
2782 			break;
2783 		default:
2784 			break;
2785 		}
2786 	}
2787 
2788 	return (0);
2789 }
2790 
2791 int
capa_neg_calc(struct peer * p)2792 capa_neg_calc(struct peer *p)
2793 {
2794 	struct ibuf *ebuf;
2795 	uint8_t	i, hasmp = 0, capa_code, capa_len, capa_aid = 0;
2796 
2797 	/* a capability is accepted only if both sides announced it */
2798 
2799 	p->capa.neg.refresh =
2800 	    (p->capa.ann.refresh && p->capa.peer.refresh) != 0;
2801 	p->capa.neg.enhanced_rr =
2802 	    (p->capa.ann.enhanced_rr && p->capa.peer.enhanced_rr) != 0;
2803 	p->capa.neg.as4byte =
2804 	    (p->capa.ann.as4byte && p->capa.peer.as4byte) != 0;
2805 	p->capa.neg.ext_msg =
2806 	    (p->capa.ann.ext_msg && p->capa.peer.ext_msg) != 0;
2807 
2808 	/* MP: both side must agree on the AFI,SAFI pair */
2809 	if (p->capa.peer.mp[AID_UNSPEC])
2810 		hasmp = 1;
2811 	for (i = AID_MIN; i < AID_MAX; i++) {
2812 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i])
2813 			p->capa.neg.mp[i] = 1;
2814 		else
2815 			p->capa.neg.mp[i] = 0;
2816 		if (p->capa.ann.mp[i] || p->capa.peer.mp[i])
2817 			hasmp = 1;
2818 	}
2819 	/* if no MP capability present default to IPv4 unicast mode */
2820 	if (!hasmp)
2821 		p->capa.neg.mp[AID_INET] = 1;
2822 
2823 	/*
2824 	 * graceful restart: the peer capabilities are of interest here.
2825 	 * It is necessary to compare the new values with the previous ones
2826 	 * and act accordingly. AFI/SAFI that are not part in the MP capability
2827 	 * are treated as not being present.
2828 	 * Also make sure that a flush happens if the session stopped
2829 	 * supporting graceful restart.
2830 	 */
2831 
2832 	for (i = AID_MIN; i < AID_MAX; i++) {
2833 		int8_t	negflags;
2834 
2835 		/* disable GR if the AFI/SAFI is not present */
2836 		if ((p->capa.peer.grestart.flags[i] & CAPA_GR_PRESENT &&
2837 		    p->capa.neg.mp[i] == 0))
2838 			p->capa.peer.grestart.flags[i] = 0;	/* disable */
2839 		/* look at current GR state and decide what to do */
2840 		negflags = p->capa.neg.grestart.flags[i];
2841 		p->capa.neg.grestart.flags[i] = p->capa.peer.grestart.flags[i];
2842 		if (negflags & CAPA_GR_RESTARTING) {
2843 			if (p->capa.ann.grestart.restart != 0 &&
2844 			    p->capa.peer.grestart.flags[i] & CAPA_GR_FORWARD) {
2845 				p->capa.neg.grestart.flags[i] |=
2846 				    CAPA_GR_RESTARTING;
2847 			} else {
2848 				if (imsg_rde(IMSG_SESSION_FLUSH, p->conf.id,
2849 				    &i, sizeof(i)) == -1) {
2850 					log_peer_warnx(&p->conf,
2851 					    "imsg send failed");
2852 					return (-1);
2853 				}
2854 				log_peer_warnx(&p->conf, "graceful restart of "
2855 				    "%s, not restarted, flushing", aid2str(i));
2856 			}
2857 		}
2858 	}
2859 	p->capa.neg.grestart.timeout = p->capa.peer.grestart.timeout;
2860 	p->capa.neg.grestart.restart = p->capa.peer.grestart.restart;
2861 	if (p->capa.ann.grestart.restart == 0)
2862 		p->capa.neg.grestart.restart = 0;
2863 
2864 	/* RFC 8538 graceful notification: both sides need to agree */
2865 	p->capa.neg.grestart.grnotification =
2866 	    (p->capa.ann.grestart.grnotification &&
2867 	    p->capa.peer.grestart.grnotification) != 0;
2868 
2869 	/* RFC 8950 extended nexthop encoding: both sides need to agree */
2870 	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2871 	for (i = AID_MIN; i < AID_MAX; i++) {
2872 		if (p->capa.neg.mp[i] == 0)
2873 			continue;
2874 		if (p->capa.ann.ext_nh[i] && p->capa.peer.ext_nh[i]) {
2875 			p->capa.neg.ext_nh[i] = 1;
2876 		}
2877 	}
2878 
2879 	/*
2880 	 * ADD-PATH: set only those bits where both sides agree.
2881 	 * For this compare our send bit with the recv bit from the peer
2882 	 * and vice versa.
2883 	 * The flags are stored from this systems view point.
2884 	 * At index 0 the flags are set if any per-AID flag is set.
2885 	 */
2886 	memset(p->capa.neg.add_path, 0, sizeof(p->capa.neg.add_path));
2887 	for (i = AID_MIN; i < AID_MAX; i++) {
2888 		if (p->capa.neg.mp[i] == 0)
2889 			continue;
2890 		if ((p->capa.ann.add_path[i] & CAPA_AP_RECV) &&
2891 		    (p->capa.peer.add_path[i] & CAPA_AP_SEND)) {
2892 			p->capa.neg.add_path[i] |= CAPA_AP_RECV;
2893 			p->capa.neg.add_path[0] |= CAPA_AP_RECV;
2894 		}
2895 		if ((p->capa.ann.add_path[i] & CAPA_AP_SEND) &&
2896 		    (p->capa.peer.add_path[i] & CAPA_AP_RECV)) {
2897 			p->capa.neg.add_path[i] |= CAPA_AP_SEND;
2898 			p->capa.neg.add_path[0] |= CAPA_AP_SEND;
2899 		}
2900 	}
2901 
2902 	/*
2903 	 * Open policy: check that the policy is sensible.
2904 	 *
2905 	 * Make sure that the roles match and set the negotiated capability
2906 	 * to the role of the peer. So the RDE can inject the OTC attribute.
2907 	 * See RFC 9234, section 4.2.
2908 	 * These checks should only happen on ebgp sessions.
2909 	 */
2910 	if (p->capa.ann.policy != 0 && p->capa.peer.policy != 0 &&
2911 	    p->conf.ebgp) {
2912 		switch (p->conf.role) {
2913 		case ROLE_PROVIDER:
2914 			if (p->remote_role != ROLE_CUSTOMER)
2915 				goto policyfail;
2916 			break;
2917 		case ROLE_RS:
2918 			if (p->remote_role != ROLE_RS_CLIENT)
2919 				goto policyfail;
2920 			break;
2921 		case ROLE_RS_CLIENT:
2922 			if (p->remote_role != ROLE_RS)
2923 				goto policyfail;
2924 			break;
2925 		case ROLE_CUSTOMER:
2926 			if (p->remote_role != ROLE_PROVIDER)
2927 				goto policyfail;
2928 			break;
2929 		case ROLE_PEER:
2930 			if (p->remote_role != ROLE_PEER)
2931 				goto policyfail;
2932 			break;
2933 		default:
2934  policyfail:
2935 			log_peer_warnx(&p->conf, "open policy role mismatch: "
2936 			    "our role %s, their role %s",
2937 			    log_policy(p->conf.role),
2938 			    log_policy(p->remote_role));
2939 			session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2940 			return (-1);
2941 		}
2942 		p->capa.neg.policy = 1;
2943 	}
2944 
2945 	/* enforce presence of open policy role capability */
2946 	if (p->capa.ann.policy == 2 && p->capa.peer.policy == 0 &&
2947 	    p->conf.ebgp) {
2948 		log_peer_warnx(&p->conf, "open policy role enforced but "
2949 		    "not present");
2950 		session_notification(p, ERR_OPEN, ERR_OPEN_ROLE, NULL);
2951 		return (-1);
2952 	}
2953 
2954 	/* enforce presence of other capabilities */
2955 	if (p->capa.ann.refresh == 2 && p->capa.neg.refresh == 0) {
2956 		capa_code = CAPA_REFRESH;
2957 		capa_len = 0;
2958 		goto fail;
2959 	}
2960 	/* enforce presence of other capabilities */
2961 	if (p->capa.ann.ext_msg == 2 && p->capa.neg.ext_msg == 0) {
2962 		capa_code = CAPA_EXT_MSG;
2963 		capa_len = 0;
2964 		goto fail;
2965 	}
2966 	if (p->capa.ann.enhanced_rr == 2 && p->capa.neg.enhanced_rr == 0) {
2967 		capa_code = CAPA_ENHANCED_RR;
2968 		capa_len = 0;
2969 		goto fail;
2970 	}
2971 	if (p->capa.ann.as4byte == 2 && p->capa.neg.as4byte == 0) {
2972 		capa_code = CAPA_AS4BYTE;
2973 		capa_len = 4;
2974 		goto fail;
2975 	}
2976 	if (p->capa.ann.grestart.restart == 2 &&
2977 	    p->capa.neg.grestart.restart == 0) {
2978 		capa_code = CAPA_RESTART;
2979 		capa_len = 2;
2980 		goto fail;
2981 	}
2982 	for (i = AID_MIN; i < AID_MAX; i++) {
2983 		if (p->capa.ann.mp[i] == 2 && p->capa.neg.mp[i] == 0) {
2984 			capa_code = CAPA_MP;
2985 			capa_len = 4;
2986 			capa_aid = i;
2987 			goto fail;
2988 		}
2989 	}
2990 
2991 	for (i = AID_MIN; i < AID_MAX; i++) {
2992 		if (p->capa.neg.mp[i] == 0)
2993 			continue;
2994 		if ((p->capa.ann.add_path[i] & CAPA_AP_RECV_ENFORCE) &&
2995 		    (p->capa.neg.add_path[i] & CAPA_AP_RECV) == 0) {
2996 			capa_code = CAPA_ADD_PATH;
2997 			capa_len = 4;
2998 			capa_aid = i;
2999 			goto fail;
3000 		}
3001 		if ((p->capa.ann.add_path[i] & CAPA_AP_SEND_ENFORCE) &&
3002 		    (p->capa.neg.add_path[i] & CAPA_AP_SEND) == 0) {
3003 			capa_code = CAPA_ADD_PATH;
3004 			capa_len = 4;
3005 			capa_aid = i;
3006 			goto fail;
3007 		}
3008 	}
3009 
3010 	for (i = AID_MIN; i < AID_MAX; i++) {
3011 		if (p->capa.neg.mp[i] == 0)
3012 			continue;
3013 		if (p->capa.ann.ext_nh[i] == 2 &&
3014 		    p->capa.neg.ext_nh[i] == 0) {
3015 			capa_code = CAPA_EXT_NEXTHOP;
3016 			capa_len = 6;
3017 			capa_aid = i;
3018 			goto fail;
3019 		}
3020 	}
3021 	return (0);
3022 
3023  fail:
3024 	if ((ebuf = ibuf_dynamic(2, 256)) == NULL)
3025 		return (-1);
3026 	/* best effort, no problem if it fails */
3027 	session_capa_add(ebuf, capa_code, capa_len);
3028 	if (capa_code == CAPA_MP)
3029 		session_capa_add_mp(ebuf, capa_aid);
3030 	else if (capa_code == CAPA_ADD_PATH)
3031 		session_capa_add_afi(ebuf, capa_aid, 0);
3032 	else if (capa_code == CAPA_EXT_NEXTHOP)
3033 		session_capa_add_ext_nh(ebuf, capa_aid);
3034 	else if (capa_len > 0)
3035 		ibuf_add_zero(ebuf, capa_len);
3036 
3037 	session_notification(p, ERR_OPEN, ERR_OPEN_CAPA, ebuf);
3038 	ibuf_free(ebuf);
3039 	return (-1);
3040 }
3041 
3042 void
session_dispatch_imsg(struct imsgbuf * imsgbuf,int idx,u_int * listener_cnt)3043 session_dispatch_imsg(struct imsgbuf *imsgbuf, int idx, u_int *listener_cnt)
3044 {
3045 	struct imsg		 imsg;
3046 	struct ibuf		 ibuf;
3047 	struct mrt		 xmrt;
3048 	struct route_refresh	 rr;
3049 	struct mrt		*mrt;
3050 	struct imsgbuf		*i;
3051 	struct peer		*p;
3052 	struct listen_addr	*la, *next, nla;
3053 	struct session_dependon	 sdon;
3054 	struct bgpd_config	 tconf;
3055 	uint32_t		 peerid;
3056 	int			 n, fd, depend_ok, restricted;
3057 	uint16_t		 t;
3058 	uint8_t			 aid, errcode, subcode;
3059 
3060 	while (imsgbuf) {
3061 		if ((n = imsg_get(imsgbuf, &imsg)) == -1)
3062 			fatal("session_dispatch_imsg: imsg_get error");
3063 
3064 		if (n == 0)
3065 			break;
3066 
3067 		peerid = imsg_get_id(&imsg);
3068 		switch (imsg_get_type(&imsg)) {
3069 		case IMSG_SOCKET_CONN:
3070 		case IMSG_SOCKET_CONN_CTL:
3071 			if (idx != PFD_PIPE_MAIN)
3072 				fatalx("reconf request not from parent");
3073 			if ((fd = imsg_get_fd(&imsg)) == -1) {
3074 				log_warnx("expected to receive imsg fd to "
3075 				    "RDE but didn't receive any");
3076 				break;
3077 			}
3078 			if ((i = malloc(sizeof(struct imsgbuf))) == NULL)
3079 				fatal(NULL);
3080 			if (imsgbuf_init(i, fd) == -1 ||
3081 			    imsgbuf_set_maxsize(i, MAX_BGPD_IMSGSIZE) == -1)
3082 				fatal(NULL);
3083 			if (imsg_get_type(&imsg) == IMSG_SOCKET_CONN) {
3084 				if (ibuf_rde) {
3085 					log_warnx("Unexpected imsg connection "
3086 					    "to RDE received");
3087 					imsgbuf_clear(ibuf_rde);
3088 					free(ibuf_rde);
3089 				}
3090 				ibuf_rde = i;
3091 			} else {
3092 				if (ibuf_rde_ctl) {
3093 					log_warnx("Unexpected imsg ctl "
3094 					    "connection to RDE received");
3095 					imsgbuf_clear(ibuf_rde_ctl);
3096 					free(ibuf_rde_ctl);
3097 				}
3098 				ibuf_rde_ctl = i;
3099 			}
3100 			break;
3101 		case IMSG_RECONF_CONF:
3102 			if (idx != PFD_PIPE_MAIN)
3103 				fatalx("reconf request not from parent");
3104 			if (imsg_get_data(&imsg, &tconf, sizeof(tconf)) == -1)
3105 				fatal("imsg_get_data");
3106 
3107 			nconf = new_config();
3108 			copy_config(nconf, &tconf);
3109 			pending_reconf = 1;
3110 			break;
3111 		case IMSG_RECONF_PEER:
3112 			if (idx != PFD_PIPE_MAIN)
3113 				fatalx("reconf request not from parent");
3114 			if ((p = calloc(1, sizeof(struct peer))) == NULL)
3115 				fatal("new_peer");
3116 			if (imsg_get_data(&imsg, &p->conf, sizeof(p->conf)) ==
3117 			    -1)
3118 				fatal("imsg_get_data");
3119 			p->state = p->prev_state = STATE_NONE;
3120 			p->reconf_action = RECONF_REINIT;
3121 			if (RB_INSERT(peer_head, &nconf->peers, p) != NULL)
3122 				fatalx("%s: peer tree is corrupt", __func__);
3123 			break;
3124 		case IMSG_RECONF_PEER_AUTH:
3125 			if (idx != PFD_PIPE_MAIN)
3126 				fatalx("reconf request not from parent");
3127 			if ((p = getpeerbyid(nconf, peerid)) == NULL) {
3128 				log_warnx("no such peer: id=%u", peerid);
3129 				break;
3130 			}
3131 			if (pfkey_recv_conf(p, &imsg) == -1)
3132 				fatal("pfkey_recv_conf");
3133 			break;
3134 		case IMSG_RECONF_LISTENER:
3135 			if (idx != PFD_PIPE_MAIN)
3136 				fatalx("reconf request not from parent");
3137 			if (nconf == NULL)
3138 				fatalx("IMSG_RECONF_LISTENER but no config");
3139 			if (imsg_get_data(&imsg, &nla, sizeof(nla)) == -1)
3140 				fatal("imsg_get_data");
3141 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
3142 				if (!la_cmp(la, &nla))
3143 					break;
3144 
3145 			if (la == NULL) {
3146 				if (nla.reconf != RECONF_REINIT)
3147 					fatalx("king bula sez: "
3148 					    "expected REINIT");
3149 
3150 				if ((nla.fd = imsg_get_fd(&imsg)) == -1)
3151 					log_warnx("expected to receive fd for "
3152 					    "%s but didn't receive any",
3153 					    log_sockaddr((struct sockaddr *)
3154 					    &nla.sa, nla.sa_len));
3155 
3156 				la = calloc(1, sizeof(struct listen_addr));
3157 				if (la == NULL)
3158 					fatal(NULL);
3159 				memcpy(&la->sa, &nla.sa, sizeof(la->sa));
3160 				la->flags = nla.flags;
3161 				la->fd = nla.fd;
3162 				la->reconf = RECONF_REINIT;
3163 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
3164 				    entry);
3165 			} else {
3166 				if (nla.reconf != RECONF_KEEP)
3167 					fatalx("king bula sez: expected KEEP");
3168 				la->reconf = RECONF_KEEP;
3169 			}
3170 
3171 			break;
3172 		case IMSG_RECONF_CTRL:
3173 			if (idx != PFD_PIPE_MAIN)
3174 				fatalx("reconf request not from parent");
3175 
3176 			if (imsg_get_data(&imsg, &restricted,
3177 			    sizeof(restricted)) == -1)
3178 				fatal("imsg_get_data");
3179 			if ((fd = imsg_get_fd(&imsg)) == -1) {
3180 				log_warnx("expected to receive fd for control "
3181 				    "socket but didn't receive any");
3182 				break;
3183 			}
3184 			if (restricted) {
3185 				control_shutdown(rcsock);
3186 				rcsock = fd;
3187 			} else {
3188 				control_shutdown(csock);
3189 				csock = fd;
3190 			}
3191 			break;
3192 		case IMSG_RECONF_DRAIN:
3193 			switch (idx) {
3194 			case PFD_PIPE_ROUTE:
3195 				if (nconf != NULL)
3196 					fatalx("got unexpected %s from RDE",
3197 					    "IMSG_RECONF_DONE");
3198 				imsg_compose(ibuf_main, IMSG_RECONF_DONE, 0, 0,
3199 				    -1, NULL, 0);
3200 				break;
3201 			case PFD_PIPE_MAIN:
3202 				if (nconf == NULL)
3203 					fatalx("got unexpected %s from parent",
3204 					    "IMSG_RECONF_DONE");
3205 				imsg_compose(ibuf_main, IMSG_RECONF_DRAIN, 0, 0,
3206 				    -1, NULL, 0);
3207 				break;
3208 			default:
3209 				fatalx("reconf request not from parent or RDE");
3210 			}
3211 			break;
3212 		case IMSG_RECONF_DONE:
3213 			if (idx != PFD_PIPE_MAIN)
3214 				fatalx("reconf request not from parent");
3215 			if (nconf == NULL)
3216 				fatalx("got IMSG_RECONF_DONE but no config");
3217 			copy_config(conf, nconf);
3218 			merge_peers(conf, nconf);
3219 
3220 			/* delete old listeners */
3221 			TAILQ_FOREACH_SAFE(la, conf->listen_addrs, entry,
3222 			    next) {
3223 				if (la->reconf == RECONF_NONE) {
3224 					log_info("not listening on %s any more",
3225 					    log_sockaddr((struct sockaddr *)
3226 					    &la->sa, la->sa_len));
3227 					TAILQ_REMOVE(conf->listen_addrs, la,
3228 					    entry);
3229 					close(la->fd);
3230 					free(la);
3231 				}
3232 			}
3233 
3234 			/* add new listeners */
3235 			TAILQ_CONCAT(conf->listen_addrs, nconf->listen_addrs,
3236 			    entry);
3237 
3238 			setup_listeners(listener_cnt);
3239 			free_config(nconf);
3240 			nconf = NULL;
3241 			pending_reconf = 0;
3242 			log_info("SE reconfigured");
3243 			/*
3244 			 * IMSG_RECONF_DONE is sent when the RDE drained
3245 			 * the peer config sent in merge_peers().
3246 			 */
3247 			break;
3248 		case IMSG_SESSION_DEPENDON:
3249 			if (idx != PFD_PIPE_MAIN)
3250 				fatalx("IFINFO message not from parent");
3251 			if (imsg_get_data(&imsg, &sdon, sizeof(sdon)) == -1)
3252 				fatalx("DEPENDON imsg with wrong len");
3253 			depend_ok = sdon.depend_state;
3254 
3255 			RB_FOREACH(p, peer_head, &conf->peers)
3256 				if (!strcmp(p->conf.if_depend, sdon.ifname)) {
3257 					if (depend_ok && !p->depend_ok) {
3258 						p->depend_ok = depend_ok;
3259 						bgp_fsm(p, EVNT_START, NULL);
3260 					} else if (!depend_ok && p->depend_ok) {
3261 						p->depend_ok = depend_ok;
3262 						session_stop(p,
3263 						    ERR_CEASE_OTHER_CHANGE,
3264 						    NULL);
3265 					}
3266 				}
3267 			break;
3268 		case IMSG_MRT_OPEN:
3269 		case IMSG_MRT_REOPEN:
3270 			if (idx != PFD_PIPE_MAIN)
3271 				fatalx("mrt request not from parent");
3272 			if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3273 				log_warnx("mrt open, wrong imsg len");
3274 				break;
3275 			}
3276 
3277 			if ((xmrt.fd = imsg_get_fd(&imsg)) == -1) {
3278 				log_warnx("expected to receive fd for mrt dump "
3279 				    "but didn't receive any");
3280 				break;
3281 			}
3282 
3283 			mrt = mrt_get(&mrthead, &xmrt);
3284 			if (mrt == NULL) {
3285 				/* new dump */
3286 				mrt = calloc(1, sizeof(struct mrt));
3287 				if (mrt == NULL)
3288 					fatal("session_dispatch_imsg");
3289 				memcpy(mrt, &xmrt, sizeof(struct mrt));
3290 				if ((mrt->wbuf = msgbuf_new()) == NULL)
3291 					fatal("session_dispatch_imsg");
3292 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
3293 			} else {
3294 				/* old dump reopened */
3295 				close(mrt->fd);
3296 			}
3297 			mrt->fd = xmrt.fd;
3298 			break;
3299 		case IMSG_MRT_CLOSE:
3300 			if (idx != PFD_PIPE_MAIN)
3301 				fatalx("mrt request not from parent");
3302 			if (imsg_get_data(&imsg, &xmrt, sizeof(xmrt)) == -1) {
3303 				log_warnx("mrt close, wrong imsg len");
3304 				break;
3305 			}
3306 
3307 			mrt = mrt_get(&mrthead, &xmrt);
3308 			if (mrt != NULL)
3309 				mrt_done(mrt);
3310 			break;
3311 		case IMSG_CTL_KROUTE:
3312 		case IMSG_CTL_KROUTE_ADDR:
3313 		case IMSG_CTL_SHOW_NEXTHOP:
3314 		case IMSG_CTL_SHOW_INTERFACE:
3315 		case IMSG_CTL_SHOW_FIB_TABLES:
3316 		case IMSG_CTL_SHOW_RTR:
3317 		case IMSG_CTL_SHOW_TIMER:
3318 			if (idx != PFD_PIPE_MAIN)
3319 				fatalx("ctl kroute request not from parent");
3320 			control_imsg_relay(&imsg, NULL);
3321 			break;
3322 		case IMSG_CTL_SHOW_NEIGHBOR:
3323 			if (idx != PFD_PIPE_ROUTE_CTL)
3324 				fatalx("ctl rib request not from RDE");
3325 			p = getpeerbyid(conf, peerid);
3326 			control_imsg_relay(&imsg, p);
3327 			break;
3328 		case IMSG_CTL_SHOW_RIB:
3329 		case IMSG_CTL_SHOW_RIB_PREFIX:
3330 		case IMSG_CTL_SHOW_RIB_COMMUNITIES:
3331 		case IMSG_CTL_SHOW_RIB_ATTR:
3332 		case IMSG_CTL_SHOW_RIB_MEM:
3333 		case IMSG_CTL_SHOW_NETWORK:
3334 		case IMSG_CTL_SHOW_FLOWSPEC:
3335 		case IMSG_CTL_SHOW_SET:
3336 			if (idx != PFD_PIPE_ROUTE_CTL)
3337 				fatalx("ctl rib request not from RDE");
3338 			control_imsg_relay(&imsg, NULL);
3339 			break;
3340 		case IMSG_CTL_END:
3341 		case IMSG_CTL_RESULT:
3342 			control_imsg_relay(&imsg, NULL);
3343 			break;
3344 		case IMSG_UPDATE:
3345 			if (idx != PFD_PIPE_ROUTE)
3346 				fatalx("update request not from RDE");
3347 			if (imsg_get_ibuf(&imsg, &ibuf) == -1)
3348 				log_warn("RDE sent invalid update");
3349 			else
3350 				session_update(peerid, &ibuf);
3351 			break;
3352 		case IMSG_UPDATE_ERR:
3353 			if (idx != PFD_PIPE_ROUTE)
3354 				fatalx("update request not from RDE");
3355 			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3356 				log_warnx("no such peer: id=%u", peerid);
3357 				break;
3358 			}
3359 			if (imsg_get_ibuf(&imsg, &ibuf) == -1 ||
3360 			    ibuf_get_n8(&ibuf, &errcode) == -1 ||
3361 			    ibuf_get_n8(&ibuf, &subcode) == -1) {
3362 				log_warnx("RDE sent invalid notification");
3363 				break;
3364 			}
3365 
3366 			session_notification(p, errcode, subcode, &ibuf);
3367 			switch (errcode) {
3368 			case ERR_CEASE:
3369 				switch (subcode) {
3370 				case ERR_CEASE_MAX_PREFIX:
3371 				case ERR_CEASE_MAX_SENT_PREFIX:
3372 					t = p->conf.max_out_prefix_restart;
3373 					if (subcode == ERR_CEASE_MAX_PREFIX)
3374 						t = p->conf.max_prefix_restart;
3375 
3376 					bgp_fsm(p, EVNT_STOP, NULL);
3377 					if (t)
3378 						timer_set(&p->timers,
3379 						    Timer_IdleHold, 60 * t);
3380 					break;
3381 				default:
3382 					bgp_fsm(p, EVNT_CON_FATAL, NULL);
3383 					break;
3384 				}
3385 				break;
3386 			default:
3387 				bgp_fsm(p, EVNT_CON_FATAL, NULL);
3388 				break;
3389 			}
3390 			break;
3391 		case IMSG_REFRESH:
3392 			if (idx != PFD_PIPE_ROUTE)
3393 				fatalx("route refresh request not from RDE");
3394 			if (imsg_get_data(&imsg, &rr, sizeof(rr)) == -1) {
3395 				log_warnx("RDE sent invalid refresh msg");
3396 				break;
3397 			}
3398 			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3399 				log_warnx("no such peer: id=%u", peerid);
3400 				break;
3401 			}
3402 			if (rr.aid < AID_MIN || rr.aid >= AID_MAX)
3403 				fatalx("IMSG_REFRESH: bad AID");
3404 			session_rrefresh(p, rr.aid, rr.subtype);
3405 			break;
3406 		case IMSG_SESSION_RESTARTED:
3407 			if (idx != PFD_PIPE_ROUTE)
3408 				fatalx("session restart not from RDE");
3409 			if (imsg_get_data(&imsg, &aid, sizeof(aid)) == -1) {
3410 				log_warnx("RDE sent invalid restart msg");
3411 				break;
3412 			}
3413 			if ((p = getpeerbyid(conf, peerid)) == NULL) {
3414 				log_warnx("no such peer: id=%u", peerid);
3415 				break;
3416 			}
3417 			if (aid < AID_MIN || aid >= AID_MAX)
3418 				fatalx("IMSG_SESSION_RESTARTED: bad AID");
3419 			if (p->capa.neg.grestart.flags[aid] &
3420 			    CAPA_GR_RESTARTING) {
3421 				log_peer_warnx(&p->conf,
3422 				    "graceful restart of %s finished",
3423 				    aid2str(aid));
3424 				p->capa.neg.grestart.flags[aid] &=
3425 				    ~CAPA_GR_RESTARTING;
3426 				timer_stop(&p->timers, Timer_RestartTimeout);
3427 
3428 				/* signal back to RDE to cleanup stale routes */
3429 				if (imsg_rde(IMSG_SESSION_RESTARTED,
3430 				    peerid, &aid, sizeof(aid)) == -1)
3431 					fatal("imsg_compose: "
3432 					    "IMSG_SESSION_RESTARTED");
3433 			}
3434 			break;
3435 		default:
3436 			break;
3437 		}
3438 		imsg_free(&imsg);
3439 	}
3440 }
3441 
3442 int
la_cmp(struct listen_addr * a,struct listen_addr * b)3443 la_cmp(struct listen_addr *a, struct listen_addr *b)
3444 {
3445 	struct sockaddr_in	*in_a, *in_b;
3446 	struct sockaddr_in6	*in6_a, *in6_b;
3447 
3448 	if (a->sa.ss_family != b->sa.ss_family)
3449 		return (1);
3450 
3451 	switch (a->sa.ss_family) {
3452 	case AF_INET:
3453 		in_a = (struct sockaddr_in *)&a->sa;
3454 		in_b = (struct sockaddr_in *)&b->sa;
3455 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
3456 			return (1);
3457 		if (in_a->sin_port != in_b->sin_port)
3458 			return (1);
3459 		break;
3460 	case AF_INET6:
3461 		in6_a = (struct sockaddr_in6 *)&a->sa;
3462 		in6_b = (struct sockaddr_in6 *)&b->sa;
3463 		if (memcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
3464 		    sizeof(struct in6_addr)))
3465 			return (1);
3466 		if (in6_a->sin6_port != in6_b->sin6_port)
3467 			return (1);
3468 		break;
3469 	default:
3470 		fatal("king bula sez: unknown address family");
3471 		/* NOTREACHED */
3472 	}
3473 
3474 	return (0);
3475 }
3476 
3477 struct peer *
getpeerbydesc(struct bgpd_config * c,const char * descr)3478 getpeerbydesc(struct bgpd_config *c, const char *descr)
3479 {
3480 	struct peer	*p, *res = NULL;
3481 	int		 match = 0;
3482 
3483 	RB_FOREACH(p, peer_head, &c->peers)
3484 		if (!strcmp(p->conf.descr, descr)) {
3485 			res = p;
3486 			match++;
3487 		}
3488 
3489 	if (match > 1)
3490 		log_info("neighbor description \"%s\" not unique, request "
3491 		    "aborted", descr);
3492 
3493 	if (match == 1)
3494 		return (res);
3495 	else
3496 		return (NULL);
3497 }
3498 
3499 struct peer *
getpeerbyip(struct bgpd_config * c,struct sockaddr * ip)3500 getpeerbyip(struct bgpd_config *c, struct sockaddr *ip)
3501 {
3502 	struct bgpd_addr addr;
3503 	struct peer	*p, *newpeer, *loose = NULL;
3504 	uint32_t	 id;
3505 
3506 	sa2addr(ip, &addr, NULL);
3507 
3508 	/* we might want a more effective way to find peers by IP */
3509 	RB_FOREACH(p, peer_head, &c->peers)
3510 		if (!p->conf.template &&
3511 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
3512 			return (p);
3513 
3514 	/* try template matching */
3515 	RB_FOREACH(p, peer_head, &c->peers)
3516 		if (p->conf.template &&
3517 		    p->conf.remote_addr.aid == addr.aid &&
3518 		    session_match_mask(p, &addr))
3519 			if (loose == NULL || loose->conf.remote_masklen <
3520 			    p->conf.remote_masklen)
3521 				loose = p;
3522 
3523 	if (loose != NULL) {
3524 		/* clone */
3525 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
3526 			fatal(NULL);
3527 		memcpy(newpeer, loose, sizeof(struct peer));
3528 		for (id = PEER_ID_DYN_MAX; id > PEER_ID_STATIC_MAX; id--) {
3529 			if (getpeerbyid(c, id) == NULL)	/* we found a free id */
3530 				break;
3531 		}
3532 		newpeer->template = loose;
3533 		session_template_clone(newpeer, ip, id, 0);
3534 		newpeer->state = newpeer->prev_state = STATE_NONE;
3535 		newpeer->reconf_action = RECONF_KEEP;
3536 		newpeer->rpending = 0;
3537 		newpeer->wbuf = NULL;
3538 		init_peer(newpeer);
3539 		/* start delete timer, it is stopped when session goes up. */
3540 		timer_set(&newpeer->timers, Timer_SessionDown,
3541 		    INTERVAL_SESSION_DOWN);
3542 		bgp_fsm(newpeer, EVNT_START, NULL);
3543 		if (RB_INSERT(peer_head, &c->peers, newpeer) != NULL)
3544 			fatalx("%s: peer tree is corrupt", __func__);
3545 		return (newpeer);
3546 	}
3547 
3548 	return (NULL);
3549 }
3550 
3551 struct peer *
getpeerbyid(struct bgpd_config * c,uint32_t peerid)3552 getpeerbyid(struct bgpd_config *c, uint32_t peerid)
3553 {
3554 	static struct peer lookup;
3555 
3556 	lookup.conf.id = peerid;
3557 
3558 	return RB_FIND(peer_head, &c->peers, &lookup);
3559 }
3560 
3561 int
peer_matched(struct peer * p,struct ctl_neighbor * n)3562 peer_matched(struct peer *p, struct ctl_neighbor *n)
3563 {
3564 	char *s;
3565 
3566 	if (n && n->addr.aid) {
3567 		if (memcmp(&p->conf.remote_addr, &n->addr,
3568 		    sizeof(p->conf.remote_addr)))
3569 			return 0;
3570 	} else if (n && n->descr[0]) {
3571 		s = n->is_group ? p->conf.group : p->conf.descr;
3572 		/* cannot trust n->descr to be properly terminated */
3573 		if (strncmp(s, n->descr, sizeof(n->descr)))
3574 			return 0;
3575 	}
3576 	return 1;
3577 }
3578 
3579 void
session_template_clone(struct peer * p,struct sockaddr * ip,uint32_t id,uint32_t as)3580 session_template_clone(struct peer *p, struct sockaddr *ip, uint32_t id,
3581     uint32_t as)
3582 {
3583 	struct bgpd_addr	remote_addr;
3584 
3585 	if (ip)
3586 		sa2addr(ip, &remote_addr, NULL);
3587 	else
3588 		memcpy(&remote_addr, &p->conf.remote_addr, sizeof(remote_addr));
3589 
3590 	memcpy(&p->conf, &p->template->conf, sizeof(struct peer_config));
3591 
3592 	p->conf.id = id;
3593 
3594 	if (as) {
3595 		p->conf.remote_as = as;
3596 		p->conf.ebgp = (p->conf.remote_as != p->conf.local_as);
3597 		if (!p->conf.ebgp)
3598 			/* force enforce_as off for iBGP sessions */
3599 			p->conf.enforce_as = ENFORCE_AS_OFF;
3600 	}
3601 
3602 	memcpy(&p->conf.remote_addr, &remote_addr, sizeof(remote_addr));
3603 	switch (p->conf.remote_addr.aid) {
3604 	case AID_INET:
3605 		p->conf.remote_masklen = 32;
3606 		break;
3607 	case AID_INET6:
3608 		p->conf.remote_masklen = 128;
3609 		break;
3610 	}
3611 	p->conf.template = 0;
3612 }
3613 
3614 int
session_match_mask(struct peer * p,struct bgpd_addr * a)3615 session_match_mask(struct peer *p, struct bgpd_addr *a)
3616 {
3617 	struct bgpd_addr masked;
3618 
3619 	applymask(&masked, a, p->conf.remote_masklen);
3620 	if (memcmp(&masked, &p->conf.remote_addr, sizeof(masked)) == 0)
3621 		return (1);
3622 	return (0);
3623 }
3624 
3625 void
session_down(struct peer * peer)3626 session_down(struct peer *peer)
3627 {
3628 	memset(&peer->capa.neg, 0, sizeof(peer->capa.neg));
3629 	peer->stats.last_updown = getmonotime();
3630 
3631 	timer_set(&peer->timers, Timer_SessionDown, INTERVAL_SESSION_DOWN);
3632 
3633 	/*
3634 	 * session_down is called in the exit code path so check
3635 	 * if the RDE is still around, if not there is no need to
3636 	 * send the message.
3637 	 */
3638 	if (ibuf_rde == NULL)
3639 		return;
3640 	if (imsg_rde(IMSG_SESSION_DOWN, peer->conf.id, NULL, 0) == -1)
3641 		fatalx("imsg_compose error");
3642 }
3643 
3644 void
session_up(struct peer * p)3645 session_up(struct peer *p)
3646 {
3647 	struct session_up	 sup;
3648 
3649 	/* clear last errors, now that the session is up */
3650 	p->stats.last_sent_errcode = 0;
3651 	p->stats.last_sent_suberr = 0;
3652 	p->stats.last_rcvd_errcode = 0;
3653 	p->stats.last_rcvd_suberr = 0;
3654 	memset(p->stats.last_reason, 0, sizeof(p->stats.last_reason));
3655 
3656 	timer_stop(&p->timers, Timer_SessionDown);
3657 
3658 	if (!p->rdesession) {
3659 		/* inform rde about new peer */
3660 		if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3661 		    &p->conf, sizeof(p->conf)) == -1)
3662 			fatalx("imsg_compose error");
3663 		p->rdesession = 1;
3664 	}
3665 
3666 	if (p->local.aid == AID_INET) {
3667 		sup.local_v4_addr = p->local;
3668 		sup.local_v6_addr = p->local_alt;
3669 	} else {
3670 		sup.local_v6_addr = p->local;
3671 		sup.local_v4_addr = p->local_alt;
3672 	}
3673 	sup.remote_addr = p->remote;
3674 	sup.if_scope = p->if_scope;
3675 
3676 	sup.remote_bgpid = p->remote_bgpid;
3677 	sup.short_as = p->short_as;
3678 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
3679 	p->stats.last_updown = getmonotime();
3680 	if (imsg_rde(IMSG_SESSION_UP, p->conf.id, &sup, sizeof(sup)) == -1)
3681 		fatalx("imsg_compose error");
3682 }
3683 
3684 int
imsg_ctl_parent(struct imsg * imsg)3685 imsg_ctl_parent(struct imsg *imsg)
3686 {
3687 	return imsg_forward(ibuf_main, imsg);
3688 }
3689 
3690 int
imsg_ctl_rde(struct imsg * imsg)3691 imsg_ctl_rde(struct imsg *imsg)
3692 {
3693 	if (ibuf_rde_ctl == NULL)
3694 		return (0);
3695 	/*
3696 	 * Use control socket to talk to RDE to bypass the queue of the
3697 	 * regular imsg socket.
3698 	 */
3699 	return imsg_forward(ibuf_rde_ctl, imsg);
3700 }
3701 
3702 int
imsg_ctl_rde_msg(int type,uint32_t peerid,pid_t pid)3703 imsg_ctl_rde_msg(int type, uint32_t peerid, pid_t pid)
3704 {
3705 	if (ibuf_rde_ctl == NULL)
3706 		return (0);
3707 
3708 	/*
3709 	 * Use control socket to talk to RDE to bypass the queue of the
3710 	 * regular imsg socket.
3711 	 */
3712 	return imsg_compose(ibuf_rde_ctl, type, peerid, pid, -1, NULL, 0);
3713 }
3714 
3715 int
imsg_rde(int type,uint32_t peerid,void * data,uint16_t datalen)3716 imsg_rde(int type, uint32_t peerid, void *data, uint16_t datalen)
3717 {
3718 	if (ibuf_rde == NULL)
3719 		return (0);
3720 
3721 	return imsg_compose(ibuf_rde, type, peerid, 0, -1, data, datalen);
3722 }
3723 
3724 void
session_demote(struct peer * p,int level)3725 session_demote(struct peer *p, int level)
3726 {
3727 	struct demote_msg	msg;
3728 
3729 	strlcpy(msg.demote_group, p->conf.demote_group,
3730 	    sizeof(msg.demote_group));
3731 	msg.level = level;
3732 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
3733 	    &msg, sizeof(msg)) == -1)
3734 		fatalx("imsg_compose error");
3735 
3736 	p->demoted += level;
3737 }
3738 
3739 void
session_stop(struct peer * peer,uint8_t subcode,const char * reason)3740 session_stop(struct peer *peer, uint8_t subcode, const char *reason)
3741 {
3742 	struct ibuf *ibuf;
3743 
3744 	if (reason != NULL)
3745 		strlcpy(peer->conf.reason, reason, sizeof(peer->conf.reason));
3746 
3747 	ibuf = ibuf_dynamic(0, REASON_LEN);
3748 
3749 	if ((subcode == ERR_CEASE_ADMIN_DOWN ||
3750 	    subcode == ERR_CEASE_ADMIN_RESET) &&
3751 	    reason != NULL && *reason != '\0' &&
3752 	    ibuf != NULL) {
3753 		if (ibuf_add_n8(ibuf, strlen(reason)) == -1 ||
3754 		    ibuf_add(ibuf, reason, strlen(reason))) {
3755 			log_peer_warnx(&peer->conf,
3756 			    "trying to send overly long shutdown reason");
3757 			ibuf_free(ibuf);
3758 			ibuf = NULL;
3759 		}
3760 	}
3761 	switch (peer->state) {
3762 	case STATE_OPENSENT:
3763 	case STATE_OPENCONFIRM:
3764 	case STATE_ESTABLISHED:
3765 		session_notification(peer, ERR_CEASE, subcode, ibuf);
3766 		break;
3767 	default:
3768 		/* session not open, no need to send notification */
3769 		if (subcode >= sizeof(suberr_cease_names) / sizeof(char *) ||
3770 		    suberr_cease_names[subcode] == NULL)
3771 			log_peer_warnx(&peer->conf, "session stop: %s, "
3772 			    "unknown subcode %u", errnames[ERR_CEASE], subcode);
3773 		else
3774 			log_peer_warnx(&peer->conf, "session stop: %s, %s",
3775 			    errnames[ERR_CEASE], suberr_cease_names[subcode]);
3776 		break;
3777 	}
3778 	ibuf_free(ibuf);
3779 	bgp_fsm(peer, EVNT_STOP, NULL);
3780 }
3781 
3782 struct bgpd_addr *
session_localaddr(struct peer * p)3783 session_localaddr(struct peer *p)
3784 {
3785 	switch (p->conf.remote_addr.aid) {
3786 	case AID_INET:
3787 		return &p->conf.local_addr_v4;
3788 	case AID_INET6:
3789 		return &p->conf.local_addr_v6;
3790 	}
3791 	fatalx("Unknown AID in %s", __func__);
3792 }
3793 
3794 void
merge_peers(struct bgpd_config * c,struct bgpd_config * nc)3795 merge_peers(struct bgpd_config *c, struct bgpd_config *nc)
3796 {
3797 	struct peer *p, *np, *next;
3798 
3799 	RB_FOREACH(p, peer_head, &c->peers) {
3800 		/* templates are handled specially */
3801 		if (p->template != NULL)
3802 			continue;
3803 		np = getpeerbyid(nc, p->conf.id);
3804 		if (np == NULL) {
3805 			p->reconf_action = RECONF_DELETE;
3806 			continue;
3807 		}
3808 
3809 		/* peer no longer uses TCP MD5SIG so deconfigure */
3810 		if (p->auth_conf.method == AUTH_MD5SIG &&
3811 		    np->auth_conf.method != AUTH_MD5SIG)
3812 			tcp_md5_del_listener(c, p);
3813 		else if (np->auth_conf.method == AUTH_MD5SIG)
3814 			tcp_md5_add_listener(c, np);
3815 
3816 		memcpy(&p->conf, &np->conf, sizeof(p->conf));
3817 		memcpy(&p->auth_conf, &np->auth_conf, sizeof(p->auth_conf));
3818 		RB_REMOVE(peer_head, &nc->peers, np);
3819 		free(np);
3820 
3821 		p->reconf_action = RECONF_KEEP;
3822 
3823 		/* had demotion, is demoted, demote removed? */
3824 		if (p->demoted && !p->conf.demote_group[0])
3825 			session_demote(p, -1);
3826 
3827 		/* if session is not open then refresh pfkey data */
3828 		if (p->state < STATE_OPENSENT && !p->template)
3829 			imsg_compose(ibuf_main, IMSG_PFKEY_RELOAD,
3830 			    p->conf.id, 0, -1, NULL, 0);
3831 
3832 		/*
3833 		 * If the session is established or the SessionDown timer is
3834 		 * running sync with the RDE
3835 		 */
3836 		if (p->rdesession) {
3837 			if (imsg_rde(IMSG_SESSION_ADD, p->conf.id,
3838 			    &p->conf, sizeof(struct peer_config)) == -1)
3839 				fatalx("imsg_compose error");
3840 		}
3841 
3842 		/* apply the config to all clones of a template */
3843 		if (p->conf.template) {
3844 			struct peer *xp;
3845 			RB_FOREACH(xp, peer_head, &c->peers) {
3846 				if (xp->template != p)
3847 					continue;
3848 				session_template_clone(xp, NULL, xp->conf.id,
3849 				    xp->conf.remote_as);
3850 
3851 				if (p->rdesession) {
3852 					if (imsg_rde(IMSG_SESSION_ADD,
3853 					    xp->conf.id, &xp->conf,
3854 					    sizeof(xp->conf)) == -1)
3855 						fatalx("imsg_compose error");
3856 				}
3857 			}
3858 		}
3859 	}
3860 
3861 	if (imsg_rde(IMSG_RECONF_DRAIN, 0, NULL, 0) == -1)
3862 		fatalx("imsg_compose error");
3863 
3864 	/* pfkeys of new peers already loaded by the parent process */
3865 	RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
3866 		RB_REMOVE(peer_head, &nc->peers, np);
3867 		if (RB_INSERT(peer_head, &c->peers, np) != NULL)
3868 			fatalx("%s: peer tree is corrupt", __func__);
3869 		if (np->auth_conf.method == AUTH_MD5SIG)
3870 			tcp_md5_add_listener(c, np);
3871 	}
3872 }
3873