xref: /NextBSD/sys/ofed/drivers/infiniband/core/addr.c (revision 84d351007654069f9643c8e4b4802a7f5f08ee42)
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/mutex.h>
37 #include <linux/inetdevice.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/module.h>
41 #include <linux/notifier.h>
42 #include <net/route.h>
43 #include <net/netevent.h>
44 #include <rdma/ib_addr.h>
45 #include <netinet/if_ether.h>
46 
47 
48 MODULE_AUTHOR("Sean Hefty");
49 MODULE_DESCRIPTION("IB Address Translation");
50 MODULE_LICENSE("Dual BSD/GPL");
51 
52 struct addr_req {
53 	struct list_head list;
54 	struct sockaddr_storage src_addr;
55 	struct sockaddr_storage dst_addr;
56 	struct rdma_dev_addr *addr;
57 	struct rdma_addr_client *client;
58 	void *context;
59 	void (*callback)(int status, struct sockaddr *src_addr,
60 			 struct rdma_dev_addr *addr, void *context);
61 	unsigned long timeout;
62 	int status;
63 };
64 
65 static void process_req(struct work_struct *work);
66 
67 static DEFINE_MUTEX(lock);
68 static LIST_HEAD(req_list);
69 static struct delayed_work work;
70 static struct workqueue_struct *addr_wq;
71 
72 static struct rdma_addr_client self;
rdma_addr_register_client(struct rdma_addr_client * client)73 void rdma_addr_register_client(struct rdma_addr_client *client)
74 {
75 	atomic_set(&client->refcount, 1);
76 	init_completion(&client->comp);
77 }
78 EXPORT_SYMBOL(rdma_addr_register_client);
79 
put_client(struct rdma_addr_client * client)80 static inline void put_client(struct rdma_addr_client *client)
81 {
82 	if (atomic_dec_and_test(&client->refcount))
83 		complete(&client->comp);
84 }
85 
rdma_addr_unregister_client(struct rdma_addr_client * client)86 void rdma_addr_unregister_client(struct rdma_addr_client *client)
87 {
88 	put_client(client);
89 	wait_for_completion(&client->comp);
90 }
91 EXPORT_SYMBOL(rdma_addr_unregister_client);
92 
rdma_copy_addr(struct rdma_dev_addr * dev_addr,struct ifnet * dev,const unsigned char * dst_dev_addr)93 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
94 		     const unsigned char *dst_dev_addr)
95 {
96 	if (dev->if_type == IFT_INFINIBAND)
97 		dev_addr->dev_type = ARPHRD_INFINIBAND;
98 	else if (dev->if_type == IFT_ETHER)
99 		dev_addr->dev_type = ARPHRD_ETHER;
100 	else
101 		dev_addr->dev_type = 0;
102 	memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
103 	memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
104 	    dev->if_addrlen);
105 	if (dst_dev_addr)
106 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
107 	dev_addr->bound_dev_if = dev->if_index;
108 	return 0;
109 }
110 EXPORT_SYMBOL(rdma_copy_addr);
111 
rdma_translate_ip(struct sockaddr * addr,struct rdma_dev_addr * dev_addr,u16 * vlan_id)112 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
113 		      u16 *vlan_id)
114 {
115 	struct net_device *dev;
116 	int ret = -EADDRNOTAVAIL;
117 
118 	if (dev_addr->bound_dev_if) {
119 		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
120 		if (!dev)
121 			return -ENODEV;
122 		ret = rdma_copy_addr(dev_addr, dev, NULL);
123 		dev_put(dev);
124 		return ret;
125 	}
126 
127 	switch (addr->sa_family) {
128 	case AF_INET:
129 		dev = ip_dev_find(&init_net,
130 			((struct sockaddr_in *) addr)->sin_addr.s_addr);
131 
132 		if (!dev)
133 			return ret;
134 
135 		ret = rdma_copy_addr(dev_addr, dev, NULL);
136 		if (vlan_id)
137 			*vlan_id = rdma_vlan_dev_vlan_id(dev);
138 		dev_put(dev);
139 		break;
140 
141 #if defined(INET6)
142 	case AF_INET6:
143 		{
144 			struct sockaddr_in6 *sin6;
145 			struct ifaddr *ifa;
146 			in_port_t port;
147 
148 			sin6 = (struct sockaddr_in6 *)addr;
149 			port = sin6->sin6_port;
150 			sin6->sin6_port = 0;
151 			ifa = ifa_ifwithaddr(addr);
152 			sin6->sin6_port = port;
153 			if (ifa == NULL) {
154 				ret = -ENODEV;
155 				break;
156 			}
157 			ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
158 			if (vlan_id)
159 				*vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
160 			ifa_free(ifa);
161 			break;
162 		}
163 #endif
164 	}
165 	return ret;
166 }
167 EXPORT_SYMBOL(rdma_translate_ip);
168 
set_timeout(unsigned long time)169 static void set_timeout(unsigned long time)
170 {
171 	unsigned long delay;
172 
173 	delay = time - jiffies;
174 	if ((long)delay <= 0)
175 		delay = 1;
176 
177 	mod_delayed_work(addr_wq, &work, delay);
178 }
179 
queue_req(struct addr_req * req)180 static void queue_req(struct addr_req *req)
181 {
182 	struct addr_req *temp_req;
183 
184 	mutex_lock(&lock);
185 	list_for_each_entry_reverse(temp_req, &req_list, list) {
186 		if (time_after_eq(req->timeout, temp_req->timeout))
187 			break;
188 	}
189 
190 	list_add(&req->list, &temp_req->list);
191 
192 	if (req_list.next == &req->list)
193 		set_timeout(req->timeout);
194 	mutex_unlock(&lock);
195 }
196 
addr_resolve(struct sockaddr * src_in,struct sockaddr * dst_in,struct rdma_dev_addr * addr)197 static int addr_resolve(struct sockaddr *src_in,
198 			struct sockaddr *dst_in,
199 			struct rdma_dev_addr *addr)
200 {
201 	struct sockaddr_in *sin;
202 	struct sockaddr_in6 *sin6;
203 	struct ifaddr *ifa;
204 	struct ifnet *ifp;
205 	struct rtentry *rte;
206 	in_port_t port;
207 	u_char edst[MAX_ADDR_LEN];
208 	int multi;
209 	int bcast;
210 	int error = 0;
211 	/*
212 	 * Determine whether the address is unicast, multicast, or broadcast
213 	 * and whether the source interface is valid.
214 	 */
215 	multi = 0;
216 	bcast = 0;
217 	sin = NULL;
218 	sin6 = NULL;
219 	ifp = NULL;
220 	rte = NULL;
221 	switch (dst_in->sa_family) {
222 #ifdef INET
223 	case AF_INET:
224 		sin = (struct sockaddr_in *)dst_in;
225 		if (sin->sin_addr.s_addr == INADDR_BROADCAST)
226 			bcast = 1;
227 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
228 			multi = 1;
229 		sin = (struct sockaddr_in *)src_in;
230 		if (sin->sin_addr.s_addr != INADDR_ANY) {
231 			/*
232 			 * Address comparison fails if the port is set
233 			 * cache it here to be restored later.
234 			 */
235 			port = sin->sin_port;
236 			sin->sin_port = 0;
237 			memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
238 		}
239 		break;
240 #endif
241 #ifdef INET6
242 	case AF_INET6:
243 		sin6 = (struct sockaddr_in6 *)dst_in;
244 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
245 			multi = 1;
246 		sin6 = (struct sockaddr_in6 *)src_in;
247 		if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
248 			port = sin6->sin6_port;
249 			sin6->sin6_port = 0;
250 		} else
251 			src_in = NULL;
252 		break;
253 #endif
254 	default:
255 		return -EINVAL;
256 	}
257 	/*
258 	 * If we have a source address to use look it up first and verify
259 	 * that it is a local interface.
260 	 */
261 	if (sin->sin_addr.s_addr != INADDR_ANY) {
262 		ifa = ifa_ifwithaddr(src_in);
263 		if (sin)
264 			sin->sin_port = port;
265 		if (sin6)
266 			sin6->sin6_port = port;
267 		if (ifa == NULL)
268 			return -ENETUNREACH;
269 		ifp = ifa->ifa_ifp;
270 		ifa_free(ifa);
271 		if (bcast || multi)
272 			goto mcast;
273 	}
274 	/*
275 	 * Make sure the route exists and has a valid link.
276 	 */
277 	rte = rtalloc1(dst_in, 1, 0);
278 	if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
279 		if (rte)
280 			RTFREE_LOCKED(rte);
281 		return -EHOSTUNREACH;
282 	}
283 	/*
284 	 * If it's not multicast or broadcast and the route doesn't match the
285 	 * requested interface return unreachable.  Otherwise fetch the
286 	 * correct interface pointer and unlock the route.
287 	 */
288 	if (multi || bcast) {
289 		if (ifp == NULL) {
290 			ifp = rte->rt_ifp;
291 			/* rt_ifa holds the route answer source address */
292 			ifa = rte->rt_ifa;
293 		}
294 		RTFREE_LOCKED(rte);
295 	} else if (ifp && ifp != rte->rt_ifp) {
296 		RTFREE_LOCKED(rte);
297 		return -ENETUNREACH;
298 	} else {
299 		if (ifp == NULL) {
300 			ifp = rte->rt_ifp;
301 			ifa = rte->rt_ifa;
302 		}
303 		RT_UNLOCK(rte);
304 	}
305 mcast:
306 	if (bcast)
307 		return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr);
308 	if (multi) {
309 		struct sockaddr *llsa;
310 
311 		error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
312 		if (error)
313 			return -error;
314 		error = rdma_copy_addr(addr, ifp,
315 		    LLADDR((struct sockaddr_dl *)llsa));
316 		free(llsa, M_IFMADDR);
317 		if (error == 0)
318 			memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
319 		return error;
320 	}
321 	/*
322 	 * Resolve the link local address.
323 	 */
324 	switch (dst_in->sa_family) {
325 	case AF_INET:
326 		error = arpresolve_addr(ifp, 0, dst_in, edst, NULL);
327 		break;
328 	case AF_INET6:
329 		error = nd6_resolve_addr(ifp, 0, dst_in, edst, NULL);
330 		break;
331 	default:
332 		/* XXX: Shouldn't happen. */
333 		error = -EINVAL;
334 	}
335 	if (error == EHOSTDOWN && (rte->rt_flags & RTF_GATEWAY))
336 		error = EHOSTUNREACH;
337 	RTFREE(rte);
338 	if (error == 0) {
339 		memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
340 		return rdma_copy_addr(addr, ifp, edst);
341 	}
342 	if (error == EWOULDBLOCK)
343 		return -ENODATA;
344 	return -error;
345 }
346 
process_req(struct work_struct * work)347 static void process_req(struct work_struct *work)
348 {
349 	struct addr_req *req, *temp_req;
350 	struct sockaddr *src_in, *dst_in;
351 	struct list_head done_list;
352 
353 	INIT_LIST_HEAD(&done_list);
354 
355 	mutex_lock(&lock);
356 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
357 		if (req->status == -ENODATA) {
358 			src_in = (struct sockaddr *) &req->src_addr;
359 			dst_in = (struct sockaddr *) &req->dst_addr;
360 			req->status = addr_resolve(src_in, dst_in, req->addr);
361 			if (req->status && time_after_eq(jiffies, req->timeout))
362 				req->status = -ETIMEDOUT;
363 			else if (req->status == -ENODATA)
364 				continue;
365 		}
366 		list_move_tail(&req->list, &done_list);
367 	}
368 
369 	if (!list_empty(&req_list)) {
370 		req = list_entry(req_list.next, struct addr_req, list);
371 		set_timeout(req->timeout);
372 	}
373 	mutex_unlock(&lock);
374 
375 	list_for_each_entry_safe(req, temp_req, &done_list, list) {
376 		list_del(&req->list);
377 		req->callback(req->status, (struct sockaddr *) &req->src_addr,
378 			req->addr, req->context);
379 		put_client(req->client);
380 		kfree(req);
381 	}
382 }
383 
rdma_resolve_ip(struct rdma_addr_client * client,struct sockaddr * src_addr,struct sockaddr * dst_addr,struct rdma_dev_addr * addr,int timeout_ms,void (* callback)(int status,struct sockaddr * src_addr,struct rdma_dev_addr * addr,void * context),void * context)384 int rdma_resolve_ip(struct rdma_addr_client *client,
385 		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
386 		    struct rdma_dev_addr *addr, int timeout_ms,
387 		    void (*callback)(int status, struct sockaddr *src_addr,
388 				     struct rdma_dev_addr *addr, void *context),
389 		    void *context)
390 {
391 	struct sockaddr *src_in, *dst_in;
392 	struct addr_req *req;
393 	int ret = 0;
394 
395 	req = kzalloc(sizeof *req, GFP_KERNEL);
396 	if (!req)
397 		return -ENOMEM;
398 
399 	src_in = (struct sockaddr *) &req->src_addr;
400 	dst_in = (struct sockaddr *) &req->dst_addr;
401 
402 	if (src_addr) {
403 		if (src_addr->sa_family != dst_addr->sa_family) {
404 			ret = -EINVAL;
405 			goto err;
406 		}
407 
408 		memcpy(src_in, src_addr, ip_addr_size(src_addr));
409 	} else {
410 		src_in->sa_family = dst_addr->sa_family;
411 	}
412 
413 	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
414 	req->addr = addr;
415 	req->callback = callback;
416 	req->context = context;
417 	req->client = client;
418 	atomic_inc(&client->refcount);
419 
420 	req->status = addr_resolve(src_in, dst_in, addr);
421 	switch (req->status) {
422 	case 0:
423 		req->timeout = jiffies;
424 		queue_req(req);
425 		break;
426 	case -ENODATA:
427 		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
428 		queue_req(req);
429 		break;
430 	default:
431 		ret = req->status;
432 		atomic_dec(&client->refcount);
433 		goto err;
434 	}
435 	return ret;
436 err:
437 	kfree(req);
438 	return ret;
439 }
440 EXPORT_SYMBOL(rdma_resolve_ip);
441 
rdma_addr_cancel(struct rdma_dev_addr * addr)442 void rdma_addr_cancel(struct rdma_dev_addr *addr)
443 {
444 	struct addr_req *req, *temp_req;
445 
446 	mutex_lock(&lock);
447 	list_for_each_entry_safe(req, temp_req, &req_list, list) {
448 		if (req->addr == addr) {
449 			req->status = -ECANCELED;
450 			req->timeout = jiffies;
451 			list_move(&req->list, &req_list);
452 			set_timeout(req->timeout);
453 			break;
454 		}
455 	}
456 	mutex_unlock(&lock);
457 }
458 EXPORT_SYMBOL(rdma_addr_cancel);
459 
460 struct resolve_cb_context {
461 	struct rdma_dev_addr *addr;
462 	struct completion comp;
463 };
464 
resolve_cb(int status,struct sockaddr * src_addr,struct rdma_dev_addr * addr,void * context)465 static void resolve_cb(int status, struct sockaddr *src_addr,
466 	     struct rdma_dev_addr *addr, void *context)
467 {
468 	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
469 				rdma_dev_addr));
470 	complete(&((struct resolve_cb_context *)context)->comp);
471 }
472 
rdma_addr_find_dmac_by_grh(union ib_gid * sgid,union ib_gid * dgid,u8 * dmac,u16 * vlan_id)473 int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
474 			       u16 *vlan_id)
475 {
476 	int ret = 0;
477 	struct rdma_dev_addr dev_addr;
478 	struct resolve_cb_context ctx;
479 	struct net_device *dev;
480 
481 	union {
482 		struct sockaddr     _sockaddr;
483 		struct sockaddr_in  _sockaddr_in;
484 		struct sockaddr_in6 _sockaddr_in6;
485 	} sgid_addr, dgid_addr;
486 
487 
488 	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
489 	if (ret)
490 		return ret;
491 
492 	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
493 	if (ret)
494 		return ret;
495 
496 	memset(&dev_addr, 0, sizeof(dev_addr));
497 
498 	ctx.addr = &dev_addr;
499 	init_completion(&ctx.comp);
500 	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
501 			&dev_addr, 1000, resolve_cb, &ctx);
502 	if (ret)
503 		return ret;
504 
505 	wait_for_completion(&ctx.comp);
506 
507 	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
508 	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
509 	if (!dev)
510 		return -ENODEV;
511 	if (vlan_id)
512 		*vlan_id = rdma_vlan_dev_vlan_id(dev);
513 	dev_put(dev);
514 	return ret;
515 }
516 EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
517 
rdma_addr_find_smac_by_sgid(union ib_gid * sgid,u8 * smac,u16 * vlan_id)518 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
519 {
520 	int ret = 0;
521 	struct rdma_dev_addr dev_addr;
522 	union {
523 		struct sockaddr     _sockaddr;
524 		struct sockaddr_in  _sockaddr_in;
525 		struct sockaddr_in6 _sockaddr_in6;
526 	} gid_addr;
527 
528 	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
529 
530 	if (ret)
531 		return ret;
532 	memset(&dev_addr, 0, sizeof(dev_addr));
533 	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
534 	if (ret)
535 		return ret;
536 
537 	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
538 	return ret;
539 }
540 EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
541 
netevent_callback(struct notifier_block * self,unsigned long event,void * ctx)542 static int netevent_callback(struct notifier_block *self, unsigned long event,
543 	void *ctx)
544 {
545 	if (event == NETEVENT_NEIGH_UPDATE) {
546 			set_timeout(jiffies);
547 		}
548 	return 0;
549 }
550 
551 static struct notifier_block nb = {
552 	.notifier_call = netevent_callback
553 };
554 
addr_init(void)555 static int __init addr_init(void)
556 {
557 	INIT_DELAYED_WORK(&work, process_req);
558 	addr_wq = create_singlethread_workqueue("ib_addr");
559 	if (!addr_wq)
560 		return -ENOMEM;
561 
562 	register_netevent_notifier(&nb);
563 	rdma_addr_register_client(&self);
564 	return 0;
565 }
566 
addr_cleanup(void)567 static void __exit addr_cleanup(void)
568 {
569 	rdma_addr_unregister_client(&self);
570 	unregister_netevent_notifier(&nb);
571 	destroy_workqueue(addr_wq);
572 }
573 
574 module_init(addr_init);
575 module_exit(addr_cleanup);
576