1 /*
2 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id: cm.c 3453 2005-09-15 21:43:21Z sean.hefty $
33 */
34
35 #if HAVE_CONFIG_H
36 # include <config.h>
37 #endif /* HAVE_CONFIG_H */
38
39 #include <stdlib.h>
40 #include <string.h>
41 #include <glob.h>
42 #include <stdio.h>
43 #include <fcntl.h>
44 #include <errno.h>
45 #include <stdint.h>
46 #include <poll.h>
47 #include <unistd.h>
48 #include <pthread.h>
49 #include <infiniband/endian.h>
50 #include <infiniband/byteswap.h>
51 #include <stddef.h>
52
53 #include <infiniband/driver.h>
54 #include <infiniband/marshall.h>
55 #include <rdma/rdma_cma.h>
56 #include <rdma/rdma_cma_abi.h>
57
58 #ifdef INCLUDE_VALGRIND
59 # include <valgrind/memcheck.h>
60 # ifndef VALGRIND_MAKE_MEM_DEFINED
61 # warning "Valgrind requested, but VALGRIND_MAKE_MEM_DEFINED undefined"
62 # endif
63 #endif
64
65 #ifndef VALGRIND_MAKE_MEM_DEFINED
66 # define VALGRIND_MAKE_MEM_DEFINED(addr,len)
67 #endif
68
69 #define PFX "librdmacm: "
70
71 #if __BYTE_ORDER == __LITTLE_ENDIAN
htonll(uint64_t x)72 static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
ntohll(uint64_t x)73 static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
74 #else
htonll(uint64_t x)75 static inline uint64_t htonll(uint64_t x) { return x; }
ntohll(uint64_t x)76 static inline uint64_t ntohll(uint64_t x) { return x; }
77 #endif
78
ERR(int err)79 static inline int ERR(int err)
80 {
81 errno = err;
82 return -1;
83 }
84
85 #define CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, type, size) \
86 do { \
87 struct ucma_abi_cmd_hdr *hdr; \
88 \
89 size = sizeof(*hdr) + sizeof(*cmd); \
90 msg = alloca(size); \
91 if (!msg) \
92 return ERR(ENOMEM); \
93 hdr = msg; \
94 cmd = msg + sizeof(*hdr); \
95 hdr->cmd = type; \
96 hdr->in = sizeof(*cmd); \
97 hdr->out = sizeof(*resp); \
98 memset(cmd, 0, sizeof(*cmd)); \
99 resp = alloca(sizeof(*resp)); \
100 if (!resp) \
101 return ERR(ENOMEM); \
102 cmd->response = (uintptr_t)resp;\
103 } while (0)
104
105 #define CMA_CREATE_MSG_CMD(msg, cmd, type, size) \
106 do { \
107 struct ucma_abi_cmd_hdr *hdr; \
108 \
109 size = sizeof(*hdr) + sizeof(*cmd); \
110 msg = alloca(size); \
111 if (!msg) \
112 return ERR(ENOMEM); \
113 hdr = msg; \
114 cmd = msg + sizeof(*hdr); \
115 hdr->cmd = type; \
116 hdr->in = sizeof(*cmd); \
117 hdr->out = 0; \
118 memset(cmd, 0, sizeof(*cmd)); \
119 } while (0)
120
121 struct cma_device {
122 struct ibv_context *verbs;
123 uint64_t guid;
124 int port_cnt;
125 uint8_t max_initiator_depth;
126 uint8_t max_responder_resources;
127 };
128
129 struct cma_id_private {
130 struct rdma_cm_id id;
131 struct cma_device *cma_dev;
132 int events_completed;
133 int connect_error;
134 pthread_cond_t cond;
135 pthread_mutex_t mut;
136 uint32_t handle;
137 struct cma_multicast *mc_list;
138 };
139
140 struct cma_multicast {
141 struct cma_multicast *next;
142 struct cma_id_private *id_priv;
143 void *context;
144 int events_completed;
145 pthread_cond_t cond;
146 uint32_t handle;
147 union ibv_gid mgid;
148 uint16_t mlid;
149 struct sockaddr_storage addr;
150 };
151
152 struct cma_event {
153 struct rdma_cm_event event;
154 uint8_t private_data[RDMA_MAX_PRIVATE_DATA];
155 struct cma_id_private *id_priv;
156 struct cma_multicast *mc;
157 };
158
159 static struct cma_device *cma_dev_array;
160 static int cma_dev_cnt;
161 static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
162 static int abi_ver = RDMA_USER_CM_MAX_ABI_VERSION;
163
164 #define container_of(ptr, type, field) \
165 ((type *) ((void *)ptr - offsetof(type, field)))
166
ucma_cleanup(void)167 static void ucma_cleanup(void)
168 {
169 if (cma_dev_cnt) {
170 while (cma_dev_cnt)
171 ibv_close_device(cma_dev_array[--cma_dev_cnt].verbs);
172
173 free(cma_dev_array);
174 cma_dev_cnt = 0;
175 }
176 }
177
check_abi_version(void)178 static int check_abi_version(void)
179 {
180 char value[8];
181
182 if ((ibv_read_sysfs_file(ibv_get_sysfs_path(),
183 "class/misc/rdma_cm/abi_version",
184 value, sizeof value) < 0) &&
185 (ibv_read_sysfs_file(ibv_get_sysfs_path(),
186 "class/infiniband_ucma/abi_version",
187 value, sizeof value) < 0)) {
188 /*
189 * Older version of Linux do not have class/misc. To support
190 * backports, assume the most recent version of the ABI. If
191 * we're wrong, we'll simply fail later when calling the ABI.
192 */
193 fprintf(stderr, "librdmacm: couldn't read ABI version.\n");
194 fprintf(stderr, "librdmacm: assuming: %d\n", abi_ver);
195 return 0;
196 }
197
198 abi_ver = strtol(value, NULL, 10);
199 if (abi_ver < RDMA_USER_CM_MIN_ABI_VERSION ||
200 abi_ver > RDMA_USER_CM_MAX_ABI_VERSION) {
201 fprintf(stderr, "librdmacm: kernel ABI version %d "
202 "doesn't match library version %d.\n",
203 abi_ver, RDMA_USER_CM_MAX_ABI_VERSION);
204 return -1;
205 }
206 return 0;
207 }
208
ucma_init(void)209 static int ucma_init(void)
210 {
211 struct ibv_device **dev_list = NULL;
212 struct cma_device *cma_dev;
213 struct ibv_device_attr attr;
214 int i, ret, dev_cnt;
215
216 pthread_mutex_lock(&mut);
217 if (cma_dev_cnt) {
218 pthread_mutex_unlock(&mut);
219 return 0;
220 }
221
222 ret = check_abi_version();
223 if (ret)
224 goto err1;
225
226 dev_list = ibv_get_device_list(&dev_cnt);
227 if (!dev_list) {
228 printf("CMA: unable to get RDMA device list\n");
229 ret = ERR(ENODEV);
230 goto err1;
231 }
232
233 cma_dev_array = malloc(sizeof *cma_dev * dev_cnt);
234 if (!cma_dev_array) {
235 ret = ERR(ENOMEM);
236 goto err2;
237 }
238
239 for (i = 0; dev_list[i];) {
240 cma_dev = &cma_dev_array[i];
241
242 cma_dev->guid = ibv_get_device_guid(dev_list[i]);
243 cma_dev->verbs = ibv_open_device(dev_list[i]);
244 if (!cma_dev->verbs) {
245 printf("CMA: unable to open RDMA device\n");
246 ret = ERR(ENODEV);
247 goto err3;
248 }
249
250 i++;
251 ret = ibv_query_device(cma_dev->verbs, &attr);
252 if (ret) {
253 printf("CMA: unable to query RDMA device\n");
254 goto err3;
255 }
256
257 cma_dev->port_cnt = attr.phys_port_cnt;
258 cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
259 cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
260 }
261
262 cma_dev_cnt = dev_cnt;
263 pthread_mutex_unlock(&mut);
264 ibv_free_device_list(dev_list);
265 return 0;
266
267 err3:
268 while (i--)
269 ibv_close_device(cma_dev_array[i].verbs);
270 free(cma_dev_array);
271 err2:
272 ibv_free_device_list(dev_list);
273 err1:
274 pthread_mutex_unlock(&mut);
275 return ret;
276 }
277
rdma_get_devices(int * num_devices)278 struct ibv_context **rdma_get_devices(int *num_devices)
279 {
280 struct ibv_context **devs = NULL;
281 int i;
282
283 if (!cma_dev_cnt && ucma_init())
284 goto out;
285
286 devs = malloc(sizeof *devs * (cma_dev_cnt + 1));
287 if (!devs)
288 goto out;
289
290 for (i = 0; i < cma_dev_cnt; i++)
291 devs[i] = cma_dev_array[i].verbs;
292 devs[i] = NULL;
293 out:
294 if (num_devices)
295 *num_devices = devs ? cma_dev_cnt : 0;
296 return devs;
297 }
298
rdma_free_devices(struct ibv_context ** list)299 void rdma_free_devices(struct ibv_context **list)
300 {
301 free(list);
302 }
303
rdma_cma_fini(void)304 static void __attribute__((destructor)) rdma_cma_fini(void)
305 {
306 ucma_cleanup();
307 }
308
rdma_create_event_channel(void)309 struct rdma_event_channel *rdma_create_event_channel(void)
310 {
311 struct rdma_event_channel *channel;
312
313 if (!cma_dev_cnt && ucma_init())
314 return NULL;
315
316 channel = malloc(sizeof *channel);
317 if (!channel)
318 return NULL;
319
320 channel->fd = open("/dev/rdma_cm", O_RDWR);
321 if (channel->fd < 0) {
322 printf("CMA: unable to open /dev/rdma_cm\n");
323 goto err;
324 }
325 return channel;
326 err:
327 free(channel);
328 return NULL;
329 }
330
rdma_destroy_event_channel(struct rdma_event_channel * channel)331 void rdma_destroy_event_channel(struct rdma_event_channel *channel)
332 {
333 close(channel->fd);
334 free(channel);
335 }
336
ucma_get_device(struct cma_id_private * id_priv,uint64_t guid)337 static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid)
338 {
339 struct cma_device *cma_dev;
340 int i;
341
342 for (i = 0; i < cma_dev_cnt; i++) {
343 cma_dev = &cma_dev_array[i];
344 if (cma_dev->guid == guid) {
345 id_priv->cma_dev = cma_dev;
346 id_priv->id.verbs = cma_dev->verbs;
347 return 0;
348 }
349 }
350
351 return ERR(ENODEV);
352 }
353
ucma_free_id(struct cma_id_private * id_priv)354 static void ucma_free_id(struct cma_id_private *id_priv)
355 {
356 pthread_cond_destroy(&id_priv->cond);
357 pthread_mutex_destroy(&id_priv->mut);
358 if (id_priv->id.route.path_rec)
359 free(id_priv->id.route.path_rec);
360 free(id_priv);
361 }
362
ucma_alloc_id(struct rdma_event_channel * channel,void * context,enum rdma_port_space ps)363 static struct cma_id_private *ucma_alloc_id(struct rdma_event_channel *channel,
364 void *context,
365 enum rdma_port_space ps)
366 {
367 struct cma_id_private *id_priv;
368
369 id_priv = malloc(sizeof *id_priv);
370 if (!id_priv)
371 return NULL;
372
373 memset(id_priv, 0, sizeof *id_priv);
374 id_priv->id.context = context;
375 id_priv->id.ps = ps;
376 id_priv->id.channel = channel;
377 pthread_mutex_init(&id_priv->mut, NULL);
378 if (pthread_cond_init(&id_priv->cond, NULL))
379 goto err;
380
381 return id_priv;
382
383 err: ucma_free_id(id_priv);
384 return NULL;
385 }
386
rdma_create_id(struct rdma_event_channel * channel,struct rdma_cm_id ** id,void * context,enum rdma_port_space ps)387 int rdma_create_id(struct rdma_event_channel *channel,
388 struct rdma_cm_id **id, void *context,
389 enum rdma_port_space ps)
390 {
391 struct ucma_abi_create_id_resp *resp;
392 struct ucma_abi_create_id *cmd;
393 struct cma_id_private *id_priv;
394 void *msg;
395 int ret, size;
396
397 ret = cma_dev_cnt ? 0 : ucma_init();
398 if (ret)
399 return ret;
400
401 id_priv = ucma_alloc_id(channel, context, ps);
402 if (!id_priv)
403 return ERR(ENOMEM);
404
405 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_CREATE_ID, size);
406 cmd->uid = (uintptr_t) id_priv;
407 cmd->ps = ps;
408
409 ret = write(channel->fd, msg, size);
410 if (ret != size)
411 goto err;
412
413 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
414
415 id_priv->handle = resp->id;
416 *id = &id_priv->id;
417 return 0;
418
419 err: ucma_free_id(id_priv);
420 return ret;
421 }
422
ucma_destroy_kern_id(int fd,uint32_t handle)423 static int ucma_destroy_kern_id(int fd, uint32_t handle)
424 {
425 struct ucma_abi_destroy_id_resp *resp;
426 struct ucma_abi_destroy_id *cmd;
427 void *msg;
428 int ret, size;
429
430 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_DESTROY_ID, size);
431 cmd->id = handle;
432
433 ret = write(fd, msg, size);
434 if (ret != size)
435 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
436
437 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
438
439 return resp->events_reported;
440 }
441
rdma_destroy_id(struct rdma_cm_id * id)442 int rdma_destroy_id(struct rdma_cm_id *id)
443 {
444 struct cma_id_private *id_priv;
445 int ret;
446
447 id_priv = container_of(id, struct cma_id_private, id);
448 ret = ucma_destroy_kern_id(id->channel->fd, id_priv->handle);
449 if (ret < 0)
450 return ret;
451
452 pthread_mutex_lock(&id_priv->mut);
453 while (id_priv->events_completed < ret)
454 pthread_cond_wait(&id_priv->cond, &id_priv->mut);
455 pthread_mutex_unlock(&id_priv->mut);
456
457 ucma_free_id(id_priv);
458 return 0;
459 }
460
ucma_addrlen(struct sockaddr * addr)461 static int ucma_addrlen(struct sockaddr *addr)
462 {
463 if (!addr)
464 return 0;
465
466 switch (addr->sa_family) {
467 case PF_INET:
468 return sizeof(struct sockaddr_in);
469 case PF_INET6:
470 return sizeof(struct sockaddr_in6);
471 default:
472 return 0;
473 }
474 }
475
ucma_query_route(struct rdma_cm_id * id)476 static int ucma_query_route(struct rdma_cm_id *id)
477 {
478 struct ucma_abi_query_route_resp *resp;
479 struct ucma_abi_query_route *cmd;
480 struct cma_id_private *id_priv;
481 void *msg;
482 int ret, size, i;
483
484 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_QUERY_ROUTE, size);
485 id_priv = container_of(id, struct cma_id_private, id);
486 cmd->id = id_priv->handle;
487
488 ret = write(id->channel->fd, msg, size);
489 if (ret != size)
490 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
491
492 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
493
494 if (resp->num_paths) {
495 id->route.path_rec = malloc(sizeof *id->route.path_rec *
496 resp->num_paths);
497 if (!id->route.path_rec)
498 return ERR(ENOMEM);
499
500 id->route.num_paths = resp->num_paths;
501 for (i = 0; i < resp->num_paths; i++)
502 ibv_copy_path_rec_from_kern(&id->route.path_rec[i],
503 &resp->ib_route[i]);
504 }
505
506 memcpy(id->route.addr.addr.ibaddr.sgid.raw, resp->ib_route[0].sgid,
507 sizeof id->route.addr.addr.ibaddr.sgid);
508 memcpy(id->route.addr.addr.ibaddr.dgid.raw, resp->ib_route[0].dgid,
509 sizeof id->route.addr.addr.ibaddr.dgid);
510 id->route.addr.addr.ibaddr.pkey = resp->ib_route[0].pkey;
511 memcpy(&id->route.addr.src_addr, &resp->src_addr,
512 sizeof resp->src_addr);
513 memcpy(&id->route.addr.dst_addr, &resp->dst_addr,
514 sizeof resp->dst_addr);
515
516 if (!id_priv->cma_dev && resp->node_guid) {
517 ret = ucma_get_device(id_priv, resp->node_guid);
518 if (ret)
519 return ret;
520 id_priv->id.port_num = resp->port_num;
521 }
522
523 return 0;
524 }
525
rdma_bind_addr(struct rdma_cm_id * id,struct sockaddr * addr)526 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
527 {
528 struct ucma_abi_bind_addr *cmd;
529 struct cma_id_private *id_priv;
530 void *msg;
531 int ret, size, addrlen;
532
533 addrlen = ucma_addrlen(addr);
534 if (!addrlen)
535 return ERR(EINVAL);
536
537 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_BIND_ADDR, size);
538 id_priv = container_of(id, struct cma_id_private, id);
539 cmd->id = id_priv->handle;
540 memcpy(&cmd->addr, addr, addrlen);
541
542 ret = write(id->channel->fd, msg, size);
543 if (ret != size)
544 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
545
546 return ucma_query_route(id);
547 }
548
rdma_resolve_addr(struct rdma_cm_id * id,struct sockaddr * src_addr,struct sockaddr * dst_addr,int timeout_ms)549 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
550 struct sockaddr *dst_addr, int timeout_ms)
551 {
552 struct ucma_abi_resolve_addr *cmd;
553 struct cma_id_private *id_priv;
554 void *msg;
555 int ret, size, daddrlen;
556
557 daddrlen = ucma_addrlen(dst_addr);
558 if (!daddrlen)
559 return ERR(EINVAL);
560
561 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_RESOLVE_ADDR, size);
562 id_priv = container_of(id, struct cma_id_private, id);
563 cmd->id = id_priv->handle;
564 if (src_addr)
565 memcpy(&cmd->src_addr, src_addr, ucma_addrlen(src_addr));
566 memcpy(&cmd->dst_addr, dst_addr, daddrlen);
567 cmd->timeout_ms = timeout_ms;
568
569 ret = write(id->channel->fd, msg, size);
570 if (ret != size)
571 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
572
573 memcpy(&id->route.addr.dst_addr, dst_addr, daddrlen);
574 return 0;
575 }
576
rdma_resolve_route(struct rdma_cm_id * id,int timeout_ms)577 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
578 {
579 struct ucma_abi_resolve_route *cmd;
580 struct cma_id_private *id_priv;
581 void *msg;
582 int ret, size;
583
584 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_RESOLVE_ROUTE, size);
585 id_priv = container_of(id, struct cma_id_private, id);
586 cmd->id = id_priv->handle;
587 cmd->timeout_ms = timeout_ms;
588
589 ret = write(id->channel->fd, msg, size);
590 if (ret != size)
591 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
592
593 return 0;
594 }
595
ucma_is_ud_ps(enum rdma_port_space ps)596 static int ucma_is_ud_ps(enum rdma_port_space ps)
597 {
598 return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
599 }
600
rdma_init_qp_attr(struct rdma_cm_id * id,struct ibv_qp_attr * qp_attr,int * qp_attr_mask)601 static int rdma_init_qp_attr(struct rdma_cm_id *id, struct ibv_qp_attr *qp_attr,
602 int *qp_attr_mask)
603 {
604 struct ucma_abi_init_qp_attr *cmd;
605 struct ibv_kern_qp_attr *resp;
606 struct cma_id_private *id_priv;
607 void *msg;
608 int ret, size;
609
610 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_INIT_QP_ATTR, size);
611 id_priv = container_of(id, struct cma_id_private, id);
612 cmd->id = id_priv->handle;
613 cmd->qp_state = qp_attr->qp_state;
614
615 ret = write(id->channel->fd, msg, size);
616 if (ret != size)
617 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
618
619 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
620
621 ibv_copy_qp_attr_from_kern(qp_attr, resp);
622 *qp_attr_mask = resp->qp_attr_mask;
623 return 0;
624 }
625
ucma_modify_qp_rtr(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)626 static int ucma_modify_qp_rtr(struct rdma_cm_id *id,
627 struct rdma_conn_param *conn_param)
628 {
629 struct ibv_qp_attr qp_attr;
630 int qp_attr_mask, ret;
631
632 if (!id->qp)
633 return ERR(EINVAL);
634
635 /* Need to update QP attributes from default values. */
636 qp_attr.qp_state = IBV_QPS_INIT;
637 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
638 if (ret)
639 return ret;
640
641 ret = ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
642 if (ret)
643 return ret;
644
645 qp_attr.qp_state = IBV_QPS_RTR;
646 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
647 if (ret)
648 return ret;
649
650 if (conn_param)
651 qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
652 return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
653 }
654
ucma_modify_qp_rts(struct rdma_cm_id * id)655 static int ucma_modify_qp_rts(struct rdma_cm_id *id)
656 {
657 struct ibv_qp_attr qp_attr;
658 int qp_attr_mask, ret;
659
660 qp_attr.qp_state = IBV_QPS_RTS;
661 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
662 if (ret)
663 return ret;
664
665 return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
666 }
667
ucma_modify_qp_sqd(struct rdma_cm_id * id)668 static int ucma_modify_qp_sqd(struct rdma_cm_id *id)
669 {
670 struct ibv_qp_attr qp_attr;
671
672 if (!id->qp)
673 return 0;
674
675 qp_attr.qp_state = IBV_QPS_SQD;
676 return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE);
677 }
678
ucma_modify_qp_err(struct rdma_cm_id * id)679 static int ucma_modify_qp_err(struct rdma_cm_id *id)
680 {
681 struct ibv_qp_attr qp_attr;
682
683 if (!id->qp)
684 return 0;
685
686 qp_attr.qp_state = IBV_QPS_ERR;
687 return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE);
688 }
689
ucma_find_pkey(struct cma_device * cma_dev,uint8_t port_num,uint16_t pkey,uint16_t * pkey_index)690 static int ucma_find_pkey(struct cma_device *cma_dev, uint8_t port_num,
691 uint16_t pkey, uint16_t *pkey_index)
692 {
693 int ret, i;
694 uint16_t chk_pkey;
695
696 for (i = 0, ret = 0; !ret; i++) {
697 ret = ibv_query_pkey(cma_dev->verbs, port_num, i, &chk_pkey);
698 if (!ret && pkey == chk_pkey) {
699 *pkey_index = (uint16_t) i;
700 return 0;
701 }
702 }
703 return ERR(EINVAL);
704 }
705
ucma_init_conn_qp3(struct cma_id_private * id_priv,struct ibv_qp * qp)706 static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
707 {
708 struct ibv_qp_attr qp_attr;
709 int ret;
710
711 ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
712 id_priv->id.route.addr.addr.ibaddr.pkey,
713 &qp_attr.pkey_index);
714 if (ret)
715 return ret;
716
717 qp_attr.port_num = id_priv->id.port_num;
718 qp_attr.qp_state = IBV_QPS_INIT;
719 qp_attr.qp_access_flags = 0;
720
721 return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS |
722 IBV_QP_PKEY_INDEX | IBV_QP_PORT);
723 }
724
ucma_init_conn_qp(struct cma_id_private * id_priv,struct ibv_qp * qp)725 static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
726 {
727 struct ibv_qp_attr qp_attr;
728 int qp_attr_mask, ret;
729
730 if (abi_ver == 3)
731 return ucma_init_conn_qp3(id_priv, qp);
732
733 qp_attr.qp_state = IBV_QPS_INIT;
734 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
735 if (ret)
736 return ret;
737
738 return ibv_modify_qp(qp, &qp_attr, qp_attr_mask);
739 }
740
ucma_init_ud_qp3(struct cma_id_private * id_priv,struct ibv_qp * qp)741 static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
742 {
743 struct ibv_qp_attr qp_attr;
744 int ret;
745
746 ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
747 id_priv->id.route.addr.addr.ibaddr.pkey,
748 &qp_attr.pkey_index);
749 if (ret)
750 return ret;
751
752 qp_attr.port_num = id_priv->id.port_num;
753 qp_attr.qp_state = IBV_QPS_INIT;
754 qp_attr.qkey = RDMA_UDP_QKEY;
755
756 ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY |
757 IBV_QP_PKEY_INDEX | IBV_QP_PORT);
758 if (ret)
759 return ret;
760
761 qp_attr.qp_state = IBV_QPS_RTR;
762 ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
763 if (ret)
764 return ret;
765
766 qp_attr.qp_state = IBV_QPS_RTS;
767 qp_attr.sq_psn = 0;
768 return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
769 }
770
ucma_init_ud_qp(struct cma_id_private * id_priv,struct ibv_qp * qp)771 static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
772 {
773 struct ibv_qp_attr qp_attr;
774 int qp_attr_mask, ret;
775
776 if (abi_ver == 3)
777 return ucma_init_ud_qp3(id_priv, qp);
778
779 qp_attr.qp_state = IBV_QPS_INIT;
780 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
781 if (ret)
782 return ret;
783
784 ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask);
785 if (ret)
786 return ret;
787
788 qp_attr.qp_state = IBV_QPS_RTR;
789 ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
790 if (ret)
791 return ret;
792
793 qp_attr.qp_state = IBV_QPS_RTS;
794 qp_attr.sq_psn = 0;
795 return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
796 }
797
rdma_create_qp(struct rdma_cm_id * id,struct ibv_pd * pd,struct ibv_qp_init_attr * qp_init_attr)798 int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
799 struct ibv_qp_init_attr *qp_init_attr)
800 {
801 struct cma_id_private *id_priv;
802 struct ibv_qp *qp;
803 int ret;
804
805 id_priv = container_of(id, struct cma_id_private, id);
806 if (id->verbs != pd->context)
807 return ERR(EINVAL);
808
809 qp = ibv_create_qp(pd, qp_init_attr);
810 if (!qp)
811 return ERR(ENOMEM);
812
813 if (ucma_is_ud_ps(id->ps))
814 ret = ucma_init_ud_qp(id_priv, qp);
815 else
816 ret = ucma_init_conn_qp(id_priv, qp);
817 if (ret)
818 goto err;
819
820 id->qp = qp;
821 return 0;
822 err:
823 ibv_destroy_qp(qp);
824 return ret;
825 }
826
rdma_destroy_qp(struct rdma_cm_id * id)827 void rdma_destroy_qp(struct rdma_cm_id *id)
828 {
829 ibv_destroy_qp(id->qp);
830 }
831
ucma_valid_param(struct cma_id_private * id_priv,struct rdma_conn_param * conn_param)832 static int ucma_valid_param(struct cma_id_private *id_priv,
833 struct rdma_conn_param *conn_param)
834 {
835 if (id_priv->id.ps != RDMA_PS_TCP)
836 return 0;
837
838 if ((conn_param->responder_resources >
839 id_priv->cma_dev->max_responder_resources) ||
840 (conn_param->initiator_depth >
841 id_priv->cma_dev->max_initiator_depth))
842 return ERR(EINVAL);
843
844 return 0;
845 }
846
ucma_copy_conn_param_to_kern(struct ucma_abi_conn_param * dst,struct rdma_conn_param * src,uint32_t qp_num,uint8_t srq)847 static void ucma_copy_conn_param_to_kern(struct ucma_abi_conn_param *dst,
848 struct rdma_conn_param *src,
849 uint32_t qp_num, uint8_t srq)
850 {
851 dst->qp_num = qp_num;
852 dst->srq = srq;
853 dst->responder_resources = src->responder_resources;
854 dst->initiator_depth = src->initiator_depth;
855 dst->flow_control = src->flow_control;
856 dst->retry_count = src->retry_count;
857 dst->rnr_retry_count = src->rnr_retry_count;
858 dst->valid = 1;
859
860 if (src->private_data && src->private_data_len) {
861 memcpy(dst->private_data, src->private_data,
862 src->private_data_len);
863 dst->private_data_len = src->private_data_len;
864 }
865 }
866
rdma_connect(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)867 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
868 {
869 struct ucma_abi_connect *cmd;
870 struct cma_id_private *id_priv;
871 void *msg;
872 int ret, size;
873
874 id_priv = container_of(id, struct cma_id_private, id);
875 ret = ucma_valid_param(id_priv, conn_param);
876 if (ret)
877 return ret;
878
879 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_CONNECT, size);
880 cmd->id = id_priv->handle;
881 if (id->qp)
882 ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
883 id->qp->qp_num,
884 (id->qp->srq != NULL));
885 else
886 ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
887 conn_param->qp_num,
888 conn_param->srq);
889
890 ret = write(id->channel->fd, msg, size);
891 if (ret != size)
892 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
893
894 return 0;
895 }
896
rdma_listen(struct rdma_cm_id * id,int backlog)897 int rdma_listen(struct rdma_cm_id *id, int backlog)
898 {
899 struct ucma_abi_listen *cmd;
900 struct cma_id_private *id_priv;
901 void *msg;
902 int ret, size;
903
904 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_LISTEN, size);
905 id_priv = container_of(id, struct cma_id_private, id);
906 cmd->id = id_priv->handle;
907 cmd->backlog = backlog;
908
909 ret = write(id->channel->fd, msg, size);
910 if (ret != size)
911 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
912
913 return ucma_query_route(id);
914 }
915
rdma_accept(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)916 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
917 {
918 struct ucma_abi_accept *cmd;
919 struct cma_id_private *id_priv;
920 void *msg;
921 int ret, size;
922
923 id_priv = container_of(id, struct cma_id_private, id);
924 ret = ucma_valid_param(id_priv, conn_param);
925 if (ret)
926 return ret;
927
928 if (!ucma_is_ud_ps(id->ps)) {
929 ret = ucma_modify_qp_rtr(id, conn_param);
930 if (ret)
931 return ret;
932 }
933
934 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
935 cmd->id = id_priv->handle;
936 cmd->uid = (uintptr_t) id_priv;
937 if (id->qp)
938 ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
939 id->qp->qp_num,
940 (id->qp->srq != NULL));
941 else
942 ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
943 conn_param->qp_num,
944 conn_param->srq);
945
946 ret = write(id->channel->fd, msg, size);
947 if (ret != size) {
948 ucma_modify_qp_err(id);
949 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
950 }
951
952 return 0;
953 }
954
rdma_reject(struct rdma_cm_id * id,const void * private_data,uint8_t private_data_len)955 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
956 uint8_t private_data_len)
957 {
958 struct ucma_abi_reject *cmd;
959 struct cma_id_private *id_priv;
960 void *msg;
961 int ret, size;
962
963 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_REJECT, size);
964
965 id_priv = container_of(id, struct cma_id_private, id);
966 cmd->id = id_priv->handle;
967 if (private_data && private_data_len) {
968 memcpy(cmd->private_data, private_data, private_data_len);
969 cmd->private_data_len = private_data_len;
970 } else
971 cmd->private_data_len = 0;
972
973 ret = write(id->channel->fd, msg, size);
974 if (ret != size)
975 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
976
977 return 0;
978 }
979
rdma_notify(struct rdma_cm_id * id,enum ibv_event_type event)980 int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event)
981 {
982 struct ucma_abi_notify *cmd;
983 struct cma_id_private *id_priv;
984 void *msg;
985 int ret, size;
986
987 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_NOTIFY, size);
988
989 id_priv = container_of(id, struct cma_id_private, id);
990 cmd->id = id_priv->handle;
991 cmd->event = event;
992 ret = write(id->channel->fd, msg, size);
993 if (ret != size)
994 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
995
996 return 0;
997 }
998
rdma_disconnect(struct rdma_cm_id * id)999 int rdma_disconnect(struct rdma_cm_id *id)
1000 {
1001 struct ucma_abi_disconnect *cmd;
1002 struct cma_id_private *id_priv;
1003 void *msg;
1004 int ret, size;
1005
1006 switch (id->verbs->device->transport_type) {
1007 case IBV_TRANSPORT_IB:
1008 ret = ucma_modify_qp_err(id);
1009 break;
1010 case IBV_TRANSPORT_IWARP:
1011 ret = ucma_modify_qp_sqd(id);
1012 break;
1013 default:
1014 ret = ERR(EINVAL);
1015 }
1016 if (ret)
1017 return ret;
1018
1019 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_DISCONNECT, size);
1020 id_priv = container_of(id, struct cma_id_private, id);
1021 cmd->id = id_priv->handle;
1022
1023 ret = write(id->channel->fd, msg, size);
1024 if (ret != size)
1025 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1026
1027 return 0;
1028 }
1029
rdma_join_multicast(struct rdma_cm_id * id,struct sockaddr * addr,void * context)1030 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
1031 void *context)
1032 {
1033 struct ucma_abi_join_mcast *cmd;
1034 struct ucma_abi_create_id_resp *resp;
1035 struct cma_id_private *id_priv;
1036 struct cma_multicast *mc, **pos;
1037 void *msg;
1038 int ret, size, addrlen;
1039
1040 id_priv = container_of(id, struct cma_id_private, id);
1041 addrlen = ucma_addrlen(addr);
1042 if (!addrlen)
1043 return ERR(EINVAL);
1044
1045 mc = malloc(sizeof *mc);
1046 if (!mc)
1047 return ERR(ENOMEM);
1048
1049 memset(mc, 0, sizeof *mc);
1050 mc->context = context;
1051 mc->id_priv = id_priv;
1052 memcpy(&mc->addr, addr, addrlen);
1053 if (pthread_cond_init(&mc->cond, NULL)) {
1054 ret = -1;
1055 goto err1;
1056 }
1057
1058 pthread_mutex_lock(&id_priv->mut);
1059 mc->next = id_priv->mc_list;
1060 id_priv->mc_list = mc;
1061 pthread_mutex_unlock(&id_priv->mut);
1062
1063 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_JOIN_MCAST, size);
1064 cmd->id = id_priv->handle;
1065 memcpy(&cmd->addr, addr, addrlen);
1066 cmd->uid = (uintptr_t) mc;
1067
1068 ret = write(id->channel->fd, msg, size);
1069 if (ret != size) {
1070 ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1071 goto err2;
1072 }
1073
1074 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1075
1076 mc->handle = resp->id;
1077 return 0;
1078 err2:
1079 pthread_mutex_lock(&id_priv->mut);
1080 for (pos = &id_priv->mc_list; *pos != mc; pos = &(*pos)->next)
1081 ;
1082 *pos = mc->next;
1083 pthread_mutex_unlock(&id_priv->mut);
1084 err1:
1085 free(mc);
1086 return ret;
1087 }
1088
rdma_leave_multicast(struct rdma_cm_id * id,struct sockaddr * addr)1089 int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
1090 {
1091 struct ucma_abi_destroy_id *cmd;
1092 struct ucma_abi_destroy_id_resp *resp;
1093 struct cma_id_private *id_priv;
1094 struct cma_multicast *mc, **pos;
1095 void *msg;
1096 int ret, size, addrlen;
1097
1098 addrlen = ucma_addrlen(addr);
1099 if (!addrlen)
1100 return ERR(EINVAL);
1101
1102 id_priv = container_of(id, struct cma_id_private, id);
1103 pthread_mutex_lock(&id_priv->mut);
1104 for (pos = &id_priv->mc_list; *pos; pos = &(*pos)->next)
1105 if (!memcmp(&(*pos)->addr, addr, addrlen))
1106 break;
1107
1108 mc = *pos;
1109 if (*pos)
1110 *pos = mc->next;
1111 pthread_mutex_unlock(&id_priv->mut);
1112 if (!mc)
1113 return ERR(EADDRNOTAVAIL);
1114
1115 if (id->qp)
1116 ibv_detach_mcast(id->qp, &mc->mgid, mc->mlid);
1117
1118 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_LEAVE_MCAST, size);
1119 cmd->id = mc->handle;
1120
1121 ret = write(id->channel->fd, msg, size);
1122 if (ret != size) {
1123 ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1124 goto free;
1125 }
1126
1127 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1128
1129 pthread_mutex_lock(&id_priv->mut);
1130 while (mc->events_completed < resp->events_reported)
1131 pthread_cond_wait(&mc->cond, &id_priv->mut);
1132 pthread_mutex_unlock(&id_priv->mut);
1133
1134 ret = 0;
1135 free:
1136 free(mc);
1137 return ret;
1138 }
1139
ucma_complete_event(struct cma_id_private * id_priv)1140 static void ucma_complete_event(struct cma_id_private *id_priv)
1141 {
1142 pthread_mutex_lock(&id_priv->mut);
1143 id_priv->events_completed++;
1144 pthread_cond_signal(&id_priv->cond);
1145 pthread_mutex_unlock(&id_priv->mut);
1146 }
1147
ucma_complete_mc_event(struct cma_multicast * mc)1148 static void ucma_complete_mc_event(struct cma_multicast *mc)
1149 {
1150 pthread_mutex_lock(&mc->id_priv->mut);
1151 mc->events_completed++;
1152 pthread_cond_signal(&mc->cond);
1153 mc->id_priv->events_completed++;
1154 pthread_cond_signal(&mc->id_priv->cond);
1155 pthread_mutex_unlock(&mc->id_priv->mut);
1156 }
1157
rdma_ack_cm_event(struct rdma_cm_event * event)1158 int rdma_ack_cm_event(struct rdma_cm_event *event)
1159 {
1160 struct cma_event *evt;
1161
1162 if (!event)
1163 return ERR(EINVAL);
1164
1165 evt = container_of(event, struct cma_event, event);
1166
1167 if (evt->mc)
1168 ucma_complete_mc_event(evt->mc);
1169 else
1170 ucma_complete_event(evt->id_priv);
1171 free(evt);
1172 return 0;
1173 }
1174
ucma_process_conn_req(struct cma_event * evt,uint32_t handle)1175 static int ucma_process_conn_req(struct cma_event *evt,
1176 uint32_t handle)
1177 {
1178 struct cma_id_private *id_priv;
1179 int ret;
1180
1181 id_priv = ucma_alloc_id(evt->id_priv->id.channel,
1182 evt->id_priv->id.context, evt->id_priv->id.ps);
1183 if (!id_priv) {
1184 ucma_destroy_kern_id(evt->id_priv->id.channel->fd, handle);
1185 ret = ERR(ENOMEM);
1186 goto err;
1187 }
1188
1189 evt->event.listen_id = &evt->id_priv->id;
1190 evt->event.id = &id_priv->id;
1191 id_priv->handle = handle;
1192
1193 ret = ucma_query_route(&id_priv->id);
1194 if (ret) {
1195 rdma_destroy_id(&id_priv->id);
1196 goto err;
1197 }
1198
1199 return 0;
1200 err:
1201 ucma_complete_event(evt->id_priv);
1202 return ret;
1203 }
1204
ucma_process_conn_resp(struct cma_id_private * id_priv)1205 static int ucma_process_conn_resp(struct cma_id_private *id_priv)
1206 {
1207 struct ucma_abi_accept *cmd;
1208 void *msg;
1209 int ret, size;
1210
1211 ret = ucma_modify_qp_rtr(&id_priv->id, NULL);
1212 if (ret)
1213 goto err;
1214
1215 ret = ucma_modify_qp_rts(&id_priv->id);
1216 if (ret)
1217 goto err;
1218
1219 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
1220 cmd->id = id_priv->handle;
1221
1222 ret = write(id_priv->id.channel->fd, msg, size);
1223 if (ret != size) {
1224 ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1225 goto err;
1226 }
1227
1228 return 0;
1229 err:
1230 ucma_modify_qp_err(&id_priv->id);
1231 return ret;
1232 }
1233
ucma_process_establish(struct rdma_cm_id * id)1234 static int ucma_process_establish(struct rdma_cm_id *id)
1235 {
1236 int ret;
1237
1238 ret = ucma_modify_qp_rts(id);
1239 if (ret)
1240 ucma_modify_qp_err(id);
1241
1242 return ret;
1243 }
1244
ucma_process_join(struct cma_event * evt)1245 static int ucma_process_join(struct cma_event *evt)
1246 {
1247 evt->mc->mgid = evt->event.param.ud.ah_attr.grh.dgid;
1248 evt->mc->mlid = evt->event.param.ud.ah_attr.dlid;
1249
1250 if (!evt->id_priv->id.qp)
1251 return 0;
1252
1253 return ibv_attach_mcast(evt->id_priv->id.qp, &evt->mc->mgid,
1254 evt->mc->mlid);
1255 }
1256
ucma_copy_conn_event(struct cma_event * event,struct ucma_abi_conn_param * src)1257 static void ucma_copy_conn_event(struct cma_event *event,
1258 struct ucma_abi_conn_param *src)
1259 {
1260 struct rdma_conn_param *dst = &event->event.param.conn;
1261
1262 dst->private_data_len = src->private_data_len;
1263 if (src->private_data_len) {
1264 dst->private_data = &event->private_data;
1265 memcpy(&event->private_data, src->private_data,
1266 src->private_data_len);
1267 }
1268
1269 dst->responder_resources = src->responder_resources;
1270 dst->initiator_depth = src->initiator_depth;
1271 dst->flow_control = src->flow_control;
1272 dst->retry_count = src->retry_count;
1273 dst->rnr_retry_count = src->rnr_retry_count;
1274 dst->srq = src->srq;
1275 dst->qp_num = src->qp_num;
1276 }
1277
ucma_copy_ud_event(struct cma_event * event,struct ucma_abi_ud_param * src)1278 static void ucma_copy_ud_event(struct cma_event *event,
1279 struct ucma_abi_ud_param *src)
1280 {
1281 struct rdma_ud_param *dst = &event->event.param.ud;
1282
1283 dst->private_data_len = src->private_data_len;
1284 if (src->private_data_len) {
1285 dst->private_data = &event->private_data;
1286 memcpy(&event->private_data, src->private_data,
1287 src->private_data_len);
1288 }
1289
1290 ibv_copy_ah_attr_from_kern(&dst->ah_attr, &src->ah_attr);
1291 dst->qp_num = src->qp_num;
1292 dst->qkey = src->qkey;
1293 }
1294
rdma_get_cm_event(struct rdma_event_channel * channel,struct rdma_cm_event ** event)1295 int rdma_get_cm_event(struct rdma_event_channel *channel,
1296 struct rdma_cm_event **event)
1297 {
1298 struct ucma_abi_event_resp *resp;
1299 struct ucma_abi_get_event *cmd;
1300 struct cma_event *evt;
1301 void *msg;
1302 int ret, size;
1303
1304 ret = cma_dev_cnt ? 0 : ucma_init();
1305 if (ret)
1306 return ret;
1307
1308 if (!event)
1309 return ERR(EINVAL);
1310
1311 evt = malloc(sizeof *evt);
1312 if (!evt)
1313 return ERR(ENOMEM);
1314
1315 retry:
1316 memset(evt, 0, sizeof *evt);
1317 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_GET_EVENT, size);
1318 ret = write(channel->fd, msg, size);
1319 if (ret != size) {
1320 free(evt);
1321 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1322 }
1323
1324 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1325
1326 evt->event.event = resp->event;
1327 evt->id_priv = (void *) (uintptr_t) resp->uid;
1328 evt->event.id = &evt->id_priv->id;
1329 evt->event.status = resp->status;
1330
1331 switch (resp->event) {
1332 case RDMA_CM_EVENT_ADDR_RESOLVED:
1333 evt->event.status = ucma_query_route(&evt->id_priv->id);
1334 if (evt->event.status)
1335 evt->event.event = RDMA_CM_EVENT_ADDR_ERROR;
1336 break;
1337 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1338 evt->event.status = ucma_query_route(&evt->id_priv->id);
1339 if (evt->event.status)
1340 evt->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1341 break;
1342 case RDMA_CM_EVENT_CONNECT_REQUEST:
1343 evt->id_priv = (void *) (uintptr_t) resp->uid;
1344 if (ucma_is_ud_ps(evt->id_priv->id.ps))
1345 ucma_copy_ud_event(evt, &resp->param.ud);
1346 else
1347 ucma_copy_conn_event(evt, &resp->param.conn);
1348
1349 ret = ucma_process_conn_req(evt, resp->id);
1350 if (ret)
1351 goto retry;
1352 break;
1353 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1354 ucma_copy_conn_event(evt, &resp->param.conn);
1355 evt->event.status = ucma_process_conn_resp(evt->id_priv);
1356 if (!evt->event.status)
1357 evt->event.event = RDMA_CM_EVENT_ESTABLISHED;
1358 else {
1359 evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1360 evt->id_priv->connect_error = 1;
1361 }
1362 break;
1363 case RDMA_CM_EVENT_ESTABLISHED:
1364 if (ucma_is_ud_ps(evt->id_priv->id.ps)) {
1365 ucma_copy_ud_event(evt, &resp->param.ud);
1366 break;
1367 }
1368
1369 ucma_copy_conn_event(evt, &resp->param.conn);
1370 evt->event.status = ucma_process_establish(&evt->id_priv->id);
1371 if (evt->event.status) {
1372 evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1373 evt->id_priv->connect_error = 1;
1374 }
1375 break;
1376 case RDMA_CM_EVENT_REJECTED:
1377 if (evt->id_priv->connect_error) {
1378 ucma_complete_event(evt->id_priv);
1379 goto retry;
1380 }
1381 ucma_copy_conn_event(evt, &resp->param.conn);
1382 ucma_modify_qp_err(evt->event.id);
1383 break;
1384 case RDMA_CM_EVENT_DISCONNECTED:
1385 if (evt->id_priv->connect_error) {
1386 ucma_complete_event(evt->id_priv);
1387 goto retry;
1388 }
1389 ucma_copy_conn_event(evt, &resp->param.conn);
1390 break;
1391 case RDMA_CM_EVENT_MULTICAST_JOIN:
1392 evt->mc = (void *) (uintptr_t) resp->uid;
1393 evt->id_priv = evt->mc->id_priv;
1394 evt->event.id = &evt->id_priv->id;
1395 ucma_copy_ud_event(evt, &resp->param.ud);
1396 evt->event.param.ud.private_data = evt->mc->context;
1397 evt->event.status = ucma_process_join(evt);
1398 if (evt->event.status)
1399 evt->event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
1400 break;
1401 case RDMA_CM_EVENT_MULTICAST_ERROR:
1402 evt->mc = (void *) (uintptr_t) resp->uid;
1403 evt->id_priv = evt->mc->id_priv;
1404 evt->event.id = &evt->id_priv->id;
1405 evt->event.param.ud.private_data = evt->mc->context;
1406 break;
1407 default:
1408 evt->id_priv = (void *) (uintptr_t) resp->uid;
1409 evt->event.id = &evt->id_priv->id;
1410 evt->event.status = resp->status;
1411 if (ucma_is_ud_ps(evt->id_priv->id.ps))
1412 ucma_copy_ud_event(evt, &resp->param.ud);
1413 else
1414 ucma_copy_conn_event(evt, &resp->param.conn);
1415 break;
1416 }
1417
1418 *event = &evt->event;
1419 return 0;
1420 }
1421
rdma_event_str(enum rdma_cm_event_type event)1422 const char *rdma_event_str(enum rdma_cm_event_type event)
1423 {
1424 switch (event) {
1425 case RDMA_CM_EVENT_ADDR_RESOLVED:
1426 return "RDMA_CM_EVENT_ADDR_RESOLVED";
1427 case RDMA_CM_EVENT_ADDR_ERROR:
1428 return "RDMA_CM_EVENT_ADDR_ERROR";
1429 case RDMA_CM_EVENT_ROUTE_RESOLVED:
1430 return "RDMA_CM_EVENT_ROUTE_RESOLVED";
1431 case RDMA_CM_EVENT_ROUTE_ERROR:
1432 return "RDMA_CM_EVENT_ROUTE_ERROR";
1433 case RDMA_CM_EVENT_CONNECT_REQUEST:
1434 return "RDMA_CM_EVENT_CONNECT_REQUEST";
1435 case RDMA_CM_EVENT_CONNECT_RESPONSE:
1436 return "RDMA_CM_EVENT_CONNECT_RESPONSE";
1437 case RDMA_CM_EVENT_CONNECT_ERROR:
1438 return "RDMA_CM_EVENT_CONNECT_ERROR";
1439 case RDMA_CM_EVENT_UNREACHABLE:
1440 return "RDMA_CM_EVENT_UNREACHABLE";
1441 case RDMA_CM_EVENT_REJECTED:
1442 return "RDMA_CM_EVENT_REJECTED";
1443 case RDMA_CM_EVENT_ESTABLISHED:
1444 return "RDMA_CM_EVENT_ESTABLISHED";
1445 case RDMA_CM_EVENT_DISCONNECTED:
1446 return "RDMA_CM_EVENT_DISCONNECTED";
1447 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1448 return "RDMA_CM_EVENT_DEVICE_REMOVAL";
1449 case RDMA_CM_EVENT_MULTICAST_JOIN:
1450 return "RDMA_CM_EVENT_MULTICAST_JOIN";
1451 case RDMA_CM_EVENT_MULTICAST_ERROR:
1452 return "RDMA_CM_EVENT_MULTICAST_ERROR";
1453 case RDMA_CM_EVENT_ADDR_CHANGE:
1454 return "RDMA_CM_EVENT_ADDR_CHANGE";
1455 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1456 return "RDMA_CM_EVENT_TIMEWAIT_EXIT";
1457 default:
1458 return "UNKNOWN EVENT";
1459 }
1460 }
1461
rdma_set_option(struct rdma_cm_id * id,int level,int optname,void * optval,size_t optlen)1462 int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
1463 void *optval, size_t optlen)
1464 {
1465 struct ucma_abi_set_option *cmd;
1466 struct cma_id_private *id_priv;
1467 void *msg;
1468 int ret, size;
1469
1470 CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_SET_OPTION, size);
1471 id_priv = container_of(id, struct cma_id_private, id);
1472 cmd->id = id_priv->handle;
1473 cmd->optval = (uintptr_t) optval;
1474 cmd->level = level;
1475 cmd->optname = optname;
1476 cmd->optlen = optlen;
1477
1478 ret = write(id->channel->fd, msg, size);
1479 if (ret != size)
1480 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1481
1482 return 0;
1483 }
1484
rdma_migrate_id(struct rdma_cm_id * id,struct rdma_event_channel * channel)1485 int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel)
1486 {
1487 struct ucma_abi_migrate_resp *resp;
1488 struct ucma_abi_migrate_id *cmd;
1489 struct cma_id_private *id_priv;
1490 void *msg;
1491 int ret, size;
1492
1493 id_priv = container_of(id, struct cma_id_private, id);
1494 CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_MIGRATE_ID, size);
1495 cmd->id = id_priv->handle;
1496 cmd->fd = id->channel->fd;
1497
1498 ret = write(channel->fd, msg, size);
1499 if (ret != size)
1500 return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1501
1502 VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1503
1504 /*
1505 * Eventually if we want to support migrating channels while events are
1506 * being processed on the current channel, we need to block here while
1507 * there are any outstanding events on the current channel for this id
1508 * to prevent the user from processing events for this id on the old
1509 * channel after this call returns.
1510 */
1511 pthread_mutex_lock(&id_priv->mut);
1512 id->channel = channel;
1513 while (id_priv->events_completed < resp->events_reported)
1514 pthread_cond_wait(&id_priv->cond, &id_priv->mut);
1515 pthread_mutex_unlock(&id_priv->mut);
1516
1517 return 0;
1518 }
1519