1 /*
2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005 Intel Corporation. All rights reserved.
6 *
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
12 *
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
15 * conditions are met:
16 *
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer.
20 *
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 * SOFTWARE.
34 */
35
36 #include <linux/mutex.h>
37 #include <linux/inetdevice.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/module.h>
41 #include <linux/notifier.h>
42 #include <net/route.h>
43 #include <net/netevent.h>
44 #include <rdma/ib_addr.h>
45 #include <netinet/if_ether.h>
46
47
48 MODULE_AUTHOR("Sean Hefty");
49 MODULE_DESCRIPTION("IB Address Translation");
50 MODULE_LICENSE("Dual BSD/GPL");
51
52 struct addr_req {
53 struct list_head list;
54 struct sockaddr_storage src_addr;
55 struct sockaddr_storage dst_addr;
56 struct rdma_dev_addr *addr;
57 struct rdma_addr_client *client;
58 void *context;
59 void (*callback)(int status, struct sockaddr *src_addr,
60 struct rdma_dev_addr *addr, void *context);
61 unsigned long timeout;
62 int status;
63 };
64
65 static void process_req(struct work_struct *work);
66
67 static DEFINE_MUTEX(lock);
68 static LIST_HEAD(req_list);
69 static struct delayed_work work;
70 static struct workqueue_struct *addr_wq;
71
72 static struct rdma_addr_client self;
rdma_addr_register_client(struct rdma_addr_client * client)73 void rdma_addr_register_client(struct rdma_addr_client *client)
74 {
75 atomic_set(&client->refcount, 1);
76 init_completion(&client->comp);
77 }
78 EXPORT_SYMBOL(rdma_addr_register_client);
79
put_client(struct rdma_addr_client * client)80 static inline void put_client(struct rdma_addr_client *client)
81 {
82 if (atomic_dec_and_test(&client->refcount))
83 complete(&client->comp);
84 }
85
rdma_addr_unregister_client(struct rdma_addr_client * client)86 void rdma_addr_unregister_client(struct rdma_addr_client *client)
87 {
88 put_client(client);
89 wait_for_completion(&client->comp);
90 }
91 EXPORT_SYMBOL(rdma_addr_unregister_client);
92
rdma_copy_addr(struct rdma_dev_addr * dev_addr,struct ifnet * dev,const unsigned char * dst_dev_addr)93 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
94 const unsigned char *dst_dev_addr)
95 {
96 if (dev->if_type == IFT_INFINIBAND)
97 dev_addr->dev_type = ARPHRD_INFINIBAND;
98 else if (dev->if_type == IFT_ETHER)
99 dev_addr->dev_type = ARPHRD_ETHER;
100 else
101 dev_addr->dev_type = 0;
102 memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
103 memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
104 dev->if_addrlen);
105 if (dst_dev_addr)
106 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
107 dev_addr->bound_dev_if = dev->if_index;
108 return 0;
109 }
110 EXPORT_SYMBOL(rdma_copy_addr);
111
rdma_translate_ip(struct sockaddr * addr,struct rdma_dev_addr * dev_addr,u16 * vlan_id)112 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
113 u16 *vlan_id)
114 {
115 struct net_device *dev;
116 int ret = -EADDRNOTAVAIL;
117
118 if (dev_addr->bound_dev_if) {
119 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
120 if (!dev)
121 return -ENODEV;
122 ret = rdma_copy_addr(dev_addr, dev, NULL);
123 dev_put(dev);
124 return ret;
125 }
126
127 switch (addr->sa_family) {
128 case AF_INET:
129 dev = ip_dev_find(&init_net,
130 ((struct sockaddr_in *) addr)->sin_addr.s_addr);
131
132 if (!dev)
133 return ret;
134
135 ret = rdma_copy_addr(dev_addr, dev, NULL);
136 if (vlan_id)
137 *vlan_id = rdma_vlan_dev_vlan_id(dev);
138 dev_put(dev);
139 break;
140
141 #if defined(INET6)
142 case AF_INET6:
143 {
144 struct sockaddr_in6 *sin6;
145 struct ifaddr *ifa;
146 in_port_t port;
147
148 sin6 = (struct sockaddr_in6 *)addr;
149 port = sin6->sin6_port;
150 sin6->sin6_port = 0;
151 ifa = ifa_ifwithaddr(addr);
152 sin6->sin6_port = port;
153 if (ifa == NULL) {
154 ret = -ENODEV;
155 break;
156 }
157 ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
158 if (vlan_id)
159 *vlan_id = rdma_vlan_dev_vlan_id(ifa->ifa_ifp);
160 ifa_free(ifa);
161 break;
162 }
163 #endif
164 }
165 return ret;
166 }
167 EXPORT_SYMBOL(rdma_translate_ip);
168
set_timeout(unsigned long time)169 static void set_timeout(unsigned long time)
170 {
171 unsigned long delay;
172
173 delay = time - jiffies;
174 if ((long)delay <= 0)
175 delay = 1;
176
177 mod_delayed_work(addr_wq, &work, delay);
178 }
179
queue_req(struct addr_req * req)180 static void queue_req(struct addr_req *req)
181 {
182 struct addr_req *temp_req;
183
184 mutex_lock(&lock);
185 list_for_each_entry_reverse(temp_req, &req_list, list) {
186 if (time_after_eq(req->timeout, temp_req->timeout))
187 break;
188 }
189
190 list_add(&req->list, &temp_req->list);
191
192 if (req_list.next == &req->list)
193 set_timeout(req->timeout);
194 mutex_unlock(&lock);
195 }
196
addr_resolve(struct sockaddr * src_in,struct sockaddr * dst_in,struct rdma_dev_addr * addr)197 static int addr_resolve(struct sockaddr *src_in,
198 struct sockaddr *dst_in,
199 struct rdma_dev_addr *addr)
200 {
201 struct sockaddr_in *sin;
202 struct sockaddr_in6 *sin6;
203 struct ifaddr *ifa;
204 struct ifnet *ifp;
205 struct rtentry *rte;
206 in_port_t port;
207 u_char edst[MAX_ADDR_LEN];
208 int multi;
209 int bcast;
210 int error = 0;
211 /*
212 * Determine whether the address is unicast, multicast, or broadcast
213 * and whether the source interface is valid.
214 */
215 multi = 0;
216 bcast = 0;
217 sin = NULL;
218 sin6 = NULL;
219 ifp = NULL;
220 rte = NULL;
221 switch (dst_in->sa_family) {
222 #ifdef INET
223 case AF_INET:
224 sin = (struct sockaddr_in *)dst_in;
225 if (sin->sin_addr.s_addr == INADDR_BROADCAST)
226 bcast = 1;
227 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
228 multi = 1;
229 sin = (struct sockaddr_in *)src_in;
230 if (sin->sin_addr.s_addr != INADDR_ANY) {
231 /*
232 * Address comparison fails if the port is set
233 * cache it here to be restored later.
234 */
235 port = sin->sin_port;
236 sin->sin_port = 0;
237 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
238 }
239 break;
240 #endif
241 #ifdef INET6
242 case AF_INET6:
243 sin6 = (struct sockaddr_in6 *)dst_in;
244 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
245 multi = 1;
246 sin6 = (struct sockaddr_in6 *)src_in;
247 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
248 port = sin6->sin6_port;
249 sin6->sin6_port = 0;
250 } else
251 src_in = NULL;
252 break;
253 #endif
254 default:
255 return -EINVAL;
256 }
257 /*
258 * If we have a source address to use look it up first and verify
259 * that it is a local interface.
260 */
261 if (sin->sin_addr.s_addr != INADDR_ANY) {
262 ifa = ifa_ifwithaddr(src_in);
263 if (sin)
264 sin->sin_port = port;
265 if (sin6)
266 sin6->sin6_port = port;
267 if (ifa == NULL)
268 return -ENETUNREACH;
269 ifp = ifa->ifa_ifp;
270 ifa_free(ifa);
271 if (bcast || multi)
272 goto mcast;
273 }
274 /*
275 * Make sure the route exists and has a valid link.
276 */
277 rte = rtalloc1(dst_in, 1, 0);
278 if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
279 if (rte)
280 RTFREE_LOCKED(rte);
281 return -EHOSTUNREACH;
282 }
283 /*
284 * If it's not multicast or broadcast and the route doesn't match the
285 * requested interface return unreachable. Otherwise fetch the
286 * correct interface pointer and unlock the route.
287 */
288 if (multi || bcast) {
289 if (ifp == NULL) {
290 ifp = rte->rt_ifp;
291 /* rt_ifa holds the route answer source address */
292 ifa = rte->rt_ifa;
293 }
294 RTFREE_LOCKED(rte);
295 } else if (ifp && ifp != rte->rt_ifp) {
296 RTFREE_LOCKED(rte);
297 return -ENETUNREACH;
298 } else {
299 if (ifp == NULL) {
300 ifp = rte->rt_ifp;
301 ifa = rte->rt_ifa;
302 }
303 RT_UNLOCK(rte);
304 }
305 mcast:
306 if (bcast)
307 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr);
308 if (multi) {
309 struct sockaddr *llsa;
310
311 error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
312 if (error)
313 return -error;
314 error = rdma_copy_addr(addr, ifp,
315 LLADDR((struct sockaddr_dl *)llsa));
316 free(llsa, M_IFMADDR);
317 if (error == 0)
318 memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
319 return error;
320 }
321 /*
322 * Resolve the link local address.
323 */
324 switch (dst_in->sa_family) {
325 case AF_INET:
326 error = arpresolve_addr(ifp, 0, dst_in, edst, NULL);
327 break;
328 case AF_INET6:
329 error = nd6_resolve_addr(ifp, 0, dst_in, edst, NULL);
330 break;
331 default:
332 /* XXX: Shouldn't happen. */
333 error = -EINVAL;
334 }
335 if (error == EHOSTDOWN && (rte->rt_flags & RTF_GATEWAY))
336 error = EHOSTUNREACH;
337 RTFREE(rte);
338 if (error == 0) {
339 memcpy(src_in, ifa->ifa_addr, ip_addr_size(ifa->ifa_addr));
340 return rdma_copy_addr(addr, ifp, edst);
341 }
342 if (error == EWOULDBLOCK)
343 return -ENODATA;
344 return -error;
345 }
346
process_req(struct work_struct * work)347 static void process_req(struct work_struct *work)
348 {
349 struct addr_req *req, *temp_req;
350 struct sockaddr *src_in, *dst_in;
351 struct list_head done_list;
352
353 INIT_LIST_HEAD(&done_list);
354
355 mutex_lock(&lock);
356 list_for_each_entry_safe(req, temp_req, &req_list, list) {
357 if (req->status == -ENODATA) {
358 src_in = (struct sockaddr *) &req->src_addr;
359 dst_in = (struct sockaddr *) &req->dst_addr;
360 req->status = addr_resolve(src_in, dst_in, req->addr);
361 if (req->status && time_after_eq(jiffies, req->timeout))
362 req->status = -ETIMEDOUT;
363 else if (req->status == -ENODATA)
364 continue;
365 }
366 list_move_tail(&req->list, &done_list);
367 }
368
369 if (!list_empty(&req_list)) {
370 req = list_entry(req_list.next, struct addr_req, list);
371 set_timeout(req->timeout);
372 }
373 mutex_unlock(&lock);
374
375 list_for_each_entry_safe(req, temp_req, &done_list, list) {
376 list_del(&req->list);
377 req->callback(req->status, (struct sockaddr *) &req->src_addr,
378 req->addr, req->context);
379 put_client(req->client);
380 kfree(req);
381 }
382 }
383
rdma_resolve_ip(struct rdma_addr_client * client,struct sockaddr * src_addr,struct sockaddr * dst_addr,struct rdma_dev_addr * addr,int timeout_ms,void (* callback)(int status,struct sockaddr * src_addr,struct rdma_dev_addr * addr,void * context),void * context)384 int rdma_resolve_ip(struct rdma_addr_client *client,
385 struct sockaddr *src_addr, struct sockaddr *dst_addr,
386 struct rdma_dev_addr *addr, int timeout_ms,
387 void (*callback)(int status, struct sockaddr *src_addr,
388 struct rdma_dev_addr *addr, void *context),
389 void *context)
390 {
391 struct sockaddr *src_in, *dst_in;
392 struct addr_req *req;
393 int ret = 0;
394
395 req = kzalloc(sizeof *req, GFP_KERNEL);
396 if (!req)
397 return -ENOMEM;
398
399 src_in = (struct sockaddr *) &req->src_addr;
400 dst_in = (struct sockaddr *) &req->dst_addr;
401
402 if (src_addr) {
403 if (src_addr->sa_family != dst_addr->sa_family) {
404 ret = -EINVAL;
405 goto err;
406 }
407
408 memcpy(src_in, src_addr, ip_addr_size(src_addr));
409 } else {
410 src_in->sa_family = dst_addr->sa_family;
411 }
412
413 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
414 req->addr = addr;
415 req->callback = callback;
416 req->context = context;
417 req->client = client;
418 atomic_inc(&client->refcount);
419
420 req->status = addr_resolve(src_in, dst_in, addr);
421 switch (req->status) {
422 case 0:
423 req->timeout = jiffies;
424 queue_req(req);
425 break;
426 case -ENODATA:
427 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
428 queue_req(req);
429 break;
430 default:
431 ret = req->status;
432 atomic_dec(&client->refcount);
433 goto err;
434 }
435 return ret;
436 err:
437 kfree(req);
438 return ret;
439 }
440 EXPORT_SYMBOL(rdma_resolve_ip);
441
rdma_addr_cancel(struct rdma_dev_addr * addr)442 void rdma_addr_cancel(struct rdma_dev_addr *addr)
443 {
444 struct addr_req *req, *temp_req;
445
446 mutex_lock(&lock);
447 list_for_each_entry_safe(req, temp_req, &req_list, list) {
448 if (req->addr == addr) {
449 req->status = -ECANCELED;
450 req->timeout = jiffies;
451 list_move(&req->list, &req_list);
452 set_timeout(req->timeout);
453 break;
454 }
455 }
456 mutex_unlock(&lock);
457 }
458 EXPORT_SYMBOL(rdma_addr_cancel);
459
460 struct resolve_cb_context {
461 struct rdma_dev_addr *addr;
462 struct completion comp;
463 };
464
resolve_cb(int status,struct sockaddr * src_addr,struct rdma_dev_addr * addr,void * context)465 static void resolve_cb(int status, struct sockaddr *src_addr,
466 struct rdma_dev_addr *addr, void *context)
467 {
468 memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
469 rdma_dev_addr));
470 complete(&((struct resolve_cb_context *)context)->comp);
471 }
472
rdma_addr_find_dmac_by_grh(union ib_gid * sgid,union ib_gid * dgid,u8 * dmac,u16 * vlan_id)473 int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
474 u16 *vlan_id)
475 {
476 int ret = 0;
477 struct rdma_dev_addr dev_addr;
478 struct resolve_cb_context ctx;
479 struct net_device *dev;
480
481 union {
482 struct sockaddr _sockaddr;
483 struct sockaddr_in _sockaddr_in;
484 struct sockaddr_in6 _sockaddr_in6;
485 } sgid_addr, dgid_addr;
486
487
488 ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
489 if (ret)
490 return ret;
491
492 ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
493 if (ret)
494 return ret;
495
496 memset(&dev_addr, 0, sizeof(dev_addr));
497
498 ctx.addr = &dev_addr;
499 init_completion(&ctx.comp);
500 ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
501 &dev_addr, 1000, resolve_cb, &ctx);
502 if (ret)
503 return ret;
504
505 wait_for_completion(&ctx.comp);
506
507 memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
508 dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
509 if (!dev)
510 return -ENODEV;
511 if (vlan_id)
512 *vlan_id = rdma_vlan_dev_vlan_id(dev);
513 dev_put(dev);
514 return ret;
515 }
516 EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
517
rdma_addr_find_smac_by_sgid(union ib_gid * sgid,u8 * smac,u16 * vlan_id)518 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
519 {
520 int ret = 0;
521 struct rdma_dev_addr dev_addr;
522 union {
523 struct sockaddr _sockaddr;
524 struct sockaddr_in _sockaddr_in;
525 struct sockaddr_in6 _sockaddr_in6;
526 } gid_addr;
527
528 ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
529
530 if (ret)
531 return ret;
532 memset(&dev_addr, 0, sizeof(dev_addr));
533 ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
534 if (ret)
535 return ret;
536
537 memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
538 return ret;
539 }
540 EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
541
netevent_callback(struct notifier_block * self,unsigned long event,void * ctx)542 static int netevent_callback(struct notifier_block *self, unsigned long event,
543 void *ctx)
544 {
545 if (event == NETEVENT_NEIGH_UPDATE) {
546 set_timeout(jiffies);
547 }
548 return 0;
549 }
550
551 static struct notifier_block nb = {
552 .notifier_call = netevent_callback
553 };
554
addr_init(void)555 static int __init addr_init(void)
556 {
557 INIT_DELAYED_WORK(&work, process_req);
558 addr_wq = create_singlethread_workqueue("ib_addr");
559 if (!addr_wq)
560 return -ENOMEM;
561
562 register_netevent_notifier(&nb);
563 rdma_addr_register_client(&self);
564 return 0;
565 }
566
addr_cleanup(void)567 static void __exit addr_cleanup(void)
568 {
569 rdma_addr_unregister_client(&self);
570 unregister_netevent_notifier(&nb);
571 destroy_workqueue(addr_wq);
572 }
573
574 module_init(addr_init);
575 module_exit(addr_cleanup);
576