1 /*	$OpenBSD: radix_mpath.c,v 1.1 2004/04/25 02:48:03 itojun Exp $	*/
2 /*	$KAME: radix_mpath.c,v 1.13 2002/10/28 21:05:59 itojun Exp $	*/
3 
4 /*
5  * Copyright (C) 2001 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
32  * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
33  * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
34  * PROPERTIES.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/malloc.h>
40 #include <sys/socket.h>
41 #define	M_DONTWAIT M_NOWAIT
42 #include <sys/domain.h>
43 #include <sys/syslog.h>
44 #include <net/radix.h>
45 #include <net/radix_mpath.h>
46 #include <net/route.h>
47 #include <dev/rndvar.h>
48 
49 /*
50  * give some jitter to hash, to avoid synchronization between routers
51  */
52 static u_int32_t hashjitter;
53 
54 int
rn_mpath_capable(rnh)55 rn_mpath_capable(rnh)
56 	struct radix_node_head *rnh;
57 {
58 
59 	return rnh->rnh_multipath;
60 }
61 
62 struct radix_node *
rn_mpath_next(rn)63 rn_mpath_next(rn)
64 	struct radix_node *rn;
65 {
66 	struct radix_node *next;
67 
68 	if (!rn->rn_dupedkey)
69 		return NULL;
70 	next = rn->rn_dupedkey;
71 	if (rn->rn_mask == next->rn_mask)
72 		return next;
73 	else
74 		return NULL;
75 }
76 
77 int
rn_mpath_count(rn)78 rn_mpath_count(rn)
79 	struct radix_node *rn;
80 {
81 	int i;
82 
83 	i = 1;
84 	while ((rn = rn_mpath_next(rn)) != NULL)
85 		i++;
86 	return i;
87 }
88 
89 struct rtentry *
rt_mpath_matchgate(rt,gate)90 rt_mpath_matchgate(rt, gate)
91 	struct rtentry *rt;
92 	struct sockaddr *gate;
93 {
94 	struct radix_node *rn;
95 
96 	if (!rn_mpath_next((struct radix_node *)rt))
97 		return rt;
98 
99 	if (!gate)
100 		return NULL;
101 	/* beyond here, we use rn as the master copy */
102 	rn = (struct radix_node *)rt;
103 	do {
104 		rt = (struct rtentry *)rn;
105 		if (rt->rt_gateway->sa_len == gate->sa_len &&
106 		    !memcmp(rt->rt_gateway, gate, gate->sa_len))
107 			break;
108 	} while ((rn = rn_mpath_next(rn)) != NULL);
109 	if (!rn)
110 		return NULL;
111 
112 	return (struct rtentry *)rn;
113 }
114 
115 /*
116  * check if we have the same key/mask/gateway on the table already.
117  */
118 int
rt_mpath_conflict(rnh,rt,netmask)119 rt_mpath_conflict(rnh, rt, netmask)
120 	struct radix_node_head *rnh;
121 	struct rtentry *rt;
122 	struct sockaddr *netmask;
123 {
124 	struct radix_node *rn, *rn1;
125 	struct rtentry *rt1;
126 	char *p, *q, *eq;
127 	int same, l, skip;
128 
129 	rn = (struct radix_node *)rt;
130 	rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
131 	if (!rn1 || rn1->rn_flags & RNF_ROOT)
132 		return 0;
133 
134 	/*
135 	 * unlike other functions we have in this file, we have to check
136 	 * all key/mask/gateway as rnh_lookup can match less specific entry.
137 	 */
138 	rt1 = (struct rtentry *)rn1;
139 
140 	/* compare key. */
141 	if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
142 	    bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
143 		goto different;
144 
145 	/* key was the same.  compare netmask.  hairy... */
146 	if (rt_mask(rt1) && netmask) {
147 		skip = rnh->rnh_treetop->rn_off;
148 		if (rt_mask(rt1)->sa_len > netmask->sa_len) {
149 			/*
150 			 * as rt_mask(rt1) is made optimal by radix.c,
151 			 * there must be some 1-bits on rt_mask(rt1)
152 			 * after netmask->sa_len.  therefore, in
153 			 * this case, the entries are different.
154 			 */
155 			if (rt_mask(rt1)->sa_len > skip)
156 				goto different;
157 			else {
158 				/* no bits to compare, i.e. same*/
159 				goto maskmatched;
160 			}
161 		}
162 
163 		l = rt_mask(rt1)->sa_len;
164 		if (skip > l) {
165 			/* no bits to compare, i.e. same */
166 			goto maskmatched;
167 		}
168 		p = (char *)rt_mask(rt1);
169 		q = (char *)netmask;
170 		if (bcmp(p + skip, q + skip, l - skip))
171 			goto different;
172 		/*
173 		 * need to go through all the bit, as netmask is not
174 		 * optimal and can contain trailing 0s
175 		 */
176 		eq = (char *)netmask + netmask->sa_len;
177 		q += l;
178 		same = 1;
179 		while (eq > q)
180 			if (*q++) {
181 				same = 0;
182 				break;
183 			}
184 		if (!same)
185 			goto different;
186 	} else if (!rt_mask(rt1) && !netmask)
187 		; /* no mask to compare, i.e. same */
188 	else {
189 		/* one has mask and the other does not, different */
190 		goto different;
191 	}
192 
193  maskmatched:;
194 
195 	/* key/mask were the same.  compare gateway for all multipaths */
196 	do {
197 		rt1 = (struct rtentry *)rn1;
198 
199 		/* sanity: no use in comparing the same thing */
200 		if (rn1 == rn)
201 			continue;
202 
203 		if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
204 		    bcmp(rt1->rt_gateway, rt->rt_gateway,
205 		    rt1->rt_gateway->sa_len))
206 			continue;
207 
208 		/* all key/mask/gateway are the same.  conflicting entry. */
209 		return EEXIST;
210 	} while ((rn1 = rn_mpath_next(rn1)) != NULL);
211 
212  different:
213 	return 0;
214 }
215 
216 void
rtalloc_mpath(ro,hash)217 rtalloc_mpath(ro, hash)
218 	struct route *ro;
219 	int hash;
220 {
221 	struct radix_node *rn0, *rn;
222 	int n;
223 
224 	/*
225 	 * XXX we don't attempt to lookup cached route again; what should
226 	 * be done for sendto(3) case?
227 	 */
228 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
229 		return;				 /* XXX */
230 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1);
231 	/* if the route does not exist or it is not multipath, don't care */
232 	if (!ro->ro_rt || !rn_mpath_next((struct radix_node *)ro->ro_rt))
233 		return;
234 
235 	/* beyond here, we use rn as the master copy */
236 	rn0 = rn = (struct radix_node *)ro->ro_rt;
237 	n = rn_mpath_count(rn0);
238 
239 	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
240 	hash += hashjitter;
241 	hash %= n;
242 	while (hash-- > 0 && rn) {
243 		/* stay within the multipath routes */
244 		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
245 			break;
246 		rn = rn->rn_dupedkey;
247 	}
248 
249 	/* XXX try filling rt_gwroute and avoid unreachable gw  */
250 
251 	/* if gw selection fails, use the first match (default) */
252 	if (!rn)
253 		return;
254 
255 	rtfree(ro->ro_rt);
256 	ro->ro_rt = (struct rtentry *)rn;
257 	ro->ro_rt->rt_refcnt++;
258 }
259 
260 int
rn_mpath_inithead(head,off)261 rn_mpath_inithead(head, off)
262 	void **head;
263 	int off;
264 {
265 	struct radix_node_head *rnh;
266 
267 	hashjitter = arc4random();
268 	if (rn_inithead(head, off) == 1) {
269 		rnh = (struct radix_node_head *)*head;
270 		rnh->rnh_multipath = 1;
271 		return 1;
272 	} else
273 		return 0;
274 }
275