1 /*
2  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $Id: cm.c 3453 2005-09-15 21:43:21Z sean.hefty $
33  */
34 
35 #if HAVE_CONFIG_H
36 #  include <config.h>
37 #endif /* HAVE_CONFIG_H */
38 
39 #include <stdlib.h>
40 #include <string.h>
41 #include <glob.h>
42 #include <stdio.h>
43 #include <fcntl.h>
44 #include <errno.h>
45 #include <stdint.h>
46 #include <poll.h>
47 #include <unistd.h>
48 #include <pthread.h>
49 #include <infiniband/endian.h>
50 #include <infiniband/byteswap.h>
51 #include <stddef.h>
52 
53 #include <infiniband/driver.h>
54 #include <infiniband/marshall.h>
55 #include <rdma/rdma_cma.h>
56 #include <rdma/rdma_cma_abi.h>
57 
58 #ifdef INCLUDE_VALGRIND
59 #   include <valgrind/memcheck.h>
60 #   ifndef VALGRIND_MAKE_MEM_DEFINED
61 #       warning "Valgrind requested, but VALGRIND_MAKE_MEM_DEFINED undefined"
62 #   endif
63 #endif
64 
65 #ifndef VALGRIND_MAKE_MEM_DEFINED
66 #   define VALGRIND_MAKE_MEM_DEFINED(addr,len)
67 #endif
68 
69 #define PFX "librdmacm: "
70 
71 #if __BYTE_ORDER == __LITTLE_ENDIAN
htonll(uint64_t x)72 static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
ntohll(uint64_t x)73 static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
74 #else
htonll(uint64_t x)75 static inline uint64_t htonll(uint64_t x) { return x; }
ntohll(uint64_t x)76 static inline uint64_t ntohll(uint64_t x) { return x; }
77 #endif
78 
ERR(int err)79 static inline int ERR(int err)
80 {
81 	errno = err;
82 	return -1;
83 }
84 
85 #define CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, type, size) \
86 do {                                        \
87 	struct ucma_abi_cmd_hdr *hdr;         \
88                                             \
89 	size = sizeof(*hdr) + sizeof(*cmd); \
90 	msg = alloca(size);                 \
91 	if (!msg)                           \
92 		return ERR(ENOMEM);         \
93 	hdr = msg;                          \
94 	cmd = msg + sizeof(*hdr);           \
95 	hdr->cmd = type;                    \
96 	hdr->in  = sizeof(*cmd);            \
97 	hdr->out = sizeof(*resp);           \
98 	memset(cmd, 0, sizeof(*cmd));       \
99 	resp = alloca(sizeof(*resp));       \
100 	if (!resp)                          \
101 		return ERR(ENOMEM);         \
102 	cmd->response = (uintptr_t)resp;\
103 } while (0)
104 
105 #define CMA_CREATE_MSG_CMD(msg, cmd, type, size) \
106 do {                                        \
107 	struct ucma_abi_cmd_hdr *hdr;       \
108                                             \
109 	size = sizeof(*hdr) + sizeof(*cmd); \
110 	msg = alloca(size);                 \
111 	if (!msg)                           \
112 		return ERR(ENOMEM);         \
113 	hdr = msg;                          \
114 	cmd = msg + sizeof(*hdr);           \
115 	hdr->cmd = type;                    \
116 	hdr->in  = sizeof(*cmd);            \
117 	hdr->out = 0;                       \
118 	memset(cmd, 0, sizeof(*cmd));       \
119 } while (0)
120 
121 struct cma_device {
122 	struct ibv_context *verbs;
123 	uint64_t	    guid;
124 	int		    port_cnt;
125 	uint8_t		    max_initiator_depth;
126 	uint8_t		    max_responder_resources;
127 };
128 
129 struct cma_id_private {
130 	struct rdma_cm_id id;
131 	struct cma_device *cma_dev;
132 	int		  events_completed;
133 	int		  connect_error;
134 	pthread_cond_t	  cond;
135 	pthread_mutex_t	  mut;
136 	uint32_t	  handle;
137 	struct cma_multicast *mc_list;
138 };
139 
140 struct cma_multicast {
141 	struct cma_multicast  *next;
142 	struct cma_id_private *id_priv;
143 	void		*context;
144 	int		events_completed;
145 	pthread_cond_t	cond;
146 	uint32_t	handle;
147 	union ibv_gid	mgid;
148 	uint16_t	mlid;
149 	struct sockaddr_storage addr;
150 };
151 
152 struct cma_event {
153 	struct rdma_cm_event	event;
154 	uint8_t			private_data[RDMA_MAX_PRIVATE_DATA];
155 	struct cma_id_private	*id_priv;
156 	struct cma_multicast	*mc;
157 };
158 
159 static struct cma_device *cma_dev_array;
160 static int cma_dev_cnt;
161 static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
162 static int abi_ver = RDMA_USER_CM_MAX_ABI_VERSION;
163 
164 #define container_of(ptr, type, field) \
165 	((type *) ((void *)ptr - offsetof(type, field)))
166 
ucma_cleanup(void)167 static void ucma_cleanup(void)
168 {
169 	if (cma_dev_cnt) {
170 		while (cma_dev_cnt)
171 			ibv_close_device(cma_dev_array[--cma_dev_cnt].verbs);
172 
173 		free(cma_dev_array);
174 		cma_dev_cnt = 0;
175 	}
176 }
177 
check_abi_version(void)178 static int check_abi_version(void)
179 {
180 	char value[8];
181 
182 	if ((ibv_read_sysfs_file(ibv_get_sysfs_path(),
183 				 "class/misc/rdma_cm/abi_version",
184 				 value, sizeof value) < 0) &&
185 	    (ibv_read_sysfs_file(ibv_get_sysfs_path(),
186 				 "class/infiniband_ucma/abi_version",
187 				 value, sizeof value) < 0)) {
188 		/*
189 		 * Older version of Linux do not have class/misc.  To support
190 		 * backports, assume the most recent version of the ABI.  If
191 		 * we're wrong, we'll simply fail later when calling the ABI.
192 		 */
193 		fprintf(stderr, "librdmacm: couldn't read ABI version.\n");
194 		fprintf(stderr, "librdmacm: assuming: %d\n", abi_ver);
195 		return 0;
196 	}
197 
198 	abi_ver = strtol(value, NULL, 10);
199 	if (abi_ver < RDMA_USER_CM_MIN_ABI_VERSION ||
200 	    abi_ver > RDMA_USER_CM_MAX_ABI_VERSION) {
201 		fprintf(stderr, "librdmacm: kernel ABI version %d "
202 				"doesn't match library version %d.\n",
203 				abi_ver, RDMA_USER_CM_MAX_ABI_VERSION);
204 		return -1;
205 	}
206 	return 0;
207 }
208 
ucma_init(void)209 static int ucma_init(void)
210 {
211 	struct ibv_device **dev_list = NULL;
212 	struct cma_device *cma_dev;
213 	struct ibv_device_attr attr;
214 	int i, ret, dev_cnt;
215 
216 	pthread_mutex_lock(&mut);
217 	if (cma_dev_cnt) {
218 		pthread_mutex_unlock(&mut);
219 		return 0;
220 	}
221 
222 	ret = check_abi_version();
223 	if (ret)
224 		goto err1;
225 
226 	dev_list = ibv_get_device_list(&dev_cnt);
227 	if (!dev_list) {
228 		printf("CMA: unable to get RDMA device list\n");
229 		ret = ERR(ENODEV);
230 		goto err1;
231 	}
232 
233 	cma_dev_array = malloc(sizeof *cma_dev * dev_cnt);
234 	if (!cma_dev_array) {
235 		ret = ERR(ENOMEM);
236 		goto err2;
237 	}
238 
239 	for (i = 0; dev_list[i];) {
240 		cma_dev = &cma_dev_array[i];
241 
242 		cma_dev->guid = ibv_get_device_guid(dev_list[i]);
243 		cma_dev->verbs = ibv_open_device(dev_list[i]);
244 		if (!cma_dev->verbs) {
245 			printf("CMA: unable to open RDMA device\n");
246 			ret = ERR(ENODEV);
247 			goto err3;
248 		}
249 
250 		i++;
251 		ret = ibv_query_device(cma_dev->verbs, &attr);
252 		if (ret) {
253 			printf("CMA: unable to query RDMA device\n");
254 			goto err3;
255 		}
256 
257 		cma_dev->port_cnt = attr.phys_port_cnt;
258 		cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
259 		cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
260 	}
261 
262 	cma_dev_cnt = dev_cnt;
263 	pthread_mutex_unlock(&mut);
264 	ibv_free_device_list(dev_list);
265 	return 0;
266 
267 err3:
268 	while (i--)
269 		ibv_close_device(cma_dev_array[i].verbs);
270 	free(cma_dev_array);
271 err2:
272 	ibv_free_device_list(dev_list);
273 err1:
274 	pthread_mutex_unlock(&mut);
275 	return ret;
276 }
277 
rdma_get_devices(int * num_devices)278 struct ibv_context **rdma_get_devices(int *num_devices)
279 {
280 	struct ibv_context **devs = NULL;
281 	int i;
282 
283 	if (!cma_dev_cnt && ucma_init())
284 		goto out;
285 
286 	devs = malloc(sizeof *devs * (cma_dev_cnt + 1));
287 	if (!devs)
288 		goto out;
289 
290 	for (i = 0; i < cma_dev_cnt; i++)
291 		devs[i] = cma_dev_array[i].verbs;
292 	devs[i] = NULL;
293 out:
294 	if (num_devices)
295 		*num_devices = devs ? cma_dev_cnt : 0;
296 	return devs;
297 }
298 
rdma_free_devices(struct ibv_context ** list)299 void rdma_free_devices(struct ibv_context **list)
300 {
301 	free(list);
302 }
303 
rdma_cma_fini(void)304 static void __attribute__((destructor)) rdma_cma_fini(void)
305 {
306 	ucma_cleanup();
307 }
308 
rdma_create_event_channel(void)309 struct rdma_event_channel *rdma_create_event_channel(void)
310 {
311 	struct rdma_event_channel *channel;
312 
313 	if (!cma_dev_cnt && ucma_init())
314 		return NULL;
315 
316 	channel = malloc(sizeof *channel);
317 	if (!channel)
318 		return NULL;
319 
320 	channel->fd = open("/dev/rdma_cm", O_RDWR);
321 	if (channel->fd < 0) {
322 		printf("CMA: unable to open /dev/rdma_cm\n");
323 		goto err;
324 	}
325 	return channel;
326 err:
327 	free(channel);
328 	return NULL;
329 }
330 
rdma_destroy_event_channel(struct rdma_event_channel * channel)331 void rdma_destroy_event_channel(struct rdma_event_channel *channel)
332 {
333 	close(channel->fd);
334 	free(channel);
335 }
336 
ucma_get_device(struct cma_id_private * id_priv,uint64_t guid)337 static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid)
338 {
339 	struct cma_device *cma_dev;
340 	int i;
341 
342 	for (i = 0; i < cma_dev_cnt; i++) {
343 		cma_dev = &cma_dev_array[i];
344 		if (cma_dev->guid == guid) {
345 			id_priv->cma_dev = cma_dev;
346 			id_priv->id.verbs = cma_dev->verbs;
347 			return 0;
348 		}
349 	}
350 
351 	return ERR(ENODEV);
352 }
353 
ucma_free_id(struct cma_id_private * id_priv)354 static void ucma_free_id(struct cma_id_private *id_priv)
355 {
356 	pthread_cond_destroy(&id_priv->cond);
357 	pthread_mutex_destroy(&id_priv->mut);
358 	if (id_priv->id.route.path_rec)
359 		free(id_priv->id.route.path_rec);
360 	free(id_priv);
361 }
362 
ucma_alloc_id(struct rdma_event_channel * channel,void * context,enum rdma_port_space ps)363 static struct cma_id_private *ucma_alloc_id(struct rdma_event_channel *channel,
364 					    void *context,
365 					    enum rdma_port_space ps)
366 {
367 	struct cma_id_private *id_priv;
368 
369 	id_priv = malloc(sizeof *id_priv);
370 	if (!id_priv)
371 		return NULL;
372 
373 	memset(id_priv, 0, sizeof *id_priv);
374 	id_priv->id.context = context;
375 	id_priv->id.ps = ps;
376 	id_priv->id.channel = channel;
377 	pthread_mutex_init(&id_priv->mut, NULL);
378 	if (pthread_cond_init(&id_priv->cond, NULL))
379 		goto err;
380 
381 	return id_priv;
382 
383 err:	ucma_free_id(id_priv);
384 	return NULL;
385 }
386 
rdma_create_id(struct rdma_event_channel * channel,struct rdma_cm_id ** id,void * context,enum rdma_port_space ps)387 int rdma_create_id(struct rdma_event_channel *channel,
388 		   struct rdma_cm_id **id, void *context,
389 		   enum rdma_port_space ps)
390 {
391 	struct ucma_abi_create_id_resp *resp;
392 	struct ucma_abi_create_id *cmd;
393 	struct cma_id_private *id_priv;
394 	void *msg;
395 	int ret, size;
396 
397 	ret = cma_dev_cnt ? 0 : ucma_init();
398 	if (ret)
399 		return ret;
400 
401 	id_priv = ucma_alloc_id(channel, context, ps);
402 	if (!id_priv)
403 		return ERR(ENOMEM);
404 
405 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_CREATE_ID, size);
406 	cmd->uid = (uintptr_t) id_priv;
407 	cmd->ps = ps;
408 
409 	ret = write(channel->fd, msg, size);
410 	if (ret != size)
411 		goto err;
412 
413 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
414 
415 	id_priv->handle = resp->id;
416 	*id = &id_priv->id;
417 	return 0;
418 
419 err:	ucma_free_id(id_priv);
420 	return ret;
421 }
422 
ucma_destroy_kern_id(int fd,uint32_t handle)423 static int ucma_destroy_kern_id(int fd, uint32_t handle)
424 {
425 	struct ucma_abi_destroy_id_resp *resp;
426 	struct ucma_abi_destroy_id *cmd;
427 	void *msg;
428 	int ret, size;
429 
430 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_DESTROY_ID, size);
431 	cmd->id = handle;
432 
433 	ret = write(fd, msg, size);
434 	if (ret != size)
435 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
436 
437 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
438 
439 	return resp->events_reported;
440 }
441 
rdma_destroy_id(struct rdma_cm_id * id)442 int rdma_destroy_id(struct rdma_cm_id *id)
443 {
444 	struct cma_id_private *id_priv;
445 	int ret;
446 
447 	id_priv = container_of(id, struct cma_id_private, id);
448 	ret = ucma_destroy_kern_id(id->channel->fd, id_priv->handle);
449 	if (ret < 0)
450 		return ret;
451 
452 	pthread_mutex_lock(&id_priv->mut);
453 	while (id_priv->events_completed < ret)
454 		pthread_cond_wait(&id_priv->cond, &id_priv->mut);
455 	pthread_mutex_unlock(&id_priv->mut);
456 
457 	ucma_free_id(id_priv);
458 	return 0;
459 }
460 
ucma_addrlen(struct sockaddr * addr)461 static int ucma_addrlen(struct sockaddr *addr)
462 {
463 	if (!addr)
464 		return 0;
465 
466 	switch (addr->sa_family) {
467 	case PF_INET:
468 		return sizeof(struct sockaddr_in);
469 	case PF_INET6:
470 		return sizeof(struct sockaddr_in6);
471 	default:
472 		return 0;
473 	}
474 }
475 
ucma_query_route(struct rdma_cm_id * id)476 static int ucma_query_route(struct rdma_cm_id *id)
477 {
478 	struct ucma_abi_query_route_resp *resp;
479 	struct ucma_abi_query_route *cmd;
480 	struct cma_id_private *id_priv;
481 	void *msg;
482 	int ret, size, i;
483 
484 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_QUERY_ROUTE, size);
485 	id_priv = container_of(id, struct cma_id_private, id);
486 	cmd->id = id_priv->handle;
487 
488 	ret = write(id->channel->fd, msg, size);
489 	if (ret != size)
490 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
491 
492 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
493 
494 	if (resp->num_paths) {
495 		id->route.path_rec = malloc(sizeof *id->route.path_rec *
496 					    resp->num_paths);
497 		if (!id->route.path_rec)
498 			return ERR(ENOMEM);
499 
500 		id->route.num_paths = resp->num_paths;
501 		for (i = 0; i < resp->num_paths; i++)
502 			ibv_copy_path_rec_from_kern(&id->route.path_rec[i],
503 						    &resp->ib_route[i]);
504 	}
505 
506 	memcpy(id->route.addr.addr.ibaddr.sgid.raw, resp->ib_route[0].sgid,
507 	       sizeof id->route.addr.addr.ibaddr.sgid);
508 	memcpy(id->route.addr.addr.ibaddr.dgid.raw, resp->ib_route[0].dgid,
509 	       sizeof id->route.addr.addr.ibaddr.dgid);
510 	id->route.addr.addr.ibaddr.pkey = resp->ib_route[0].pkey;
511 	memcpy(&id->route.addr.src_addr, &resp->src_addr,
512 	       sizeof resp->src_addr);
513 	memcpy(&id->route.addr.dst_addr, &resp->dst_addr,
514 	       sizeof resp->dst_addr);
515 
516 	if (!id_priv->cma_dev && resp->node_guid) {
517 		ret = ucma_get_device(id_priv, resp->node_guid);
518 		if (ret)
519 			return ret;
520 		id_priv->id.port_num = resp->port_num;
521 	}
522 
523 	return 0;
524 }
525 
rdma_bind_addr(struct rdma_cm_id * id,struct sockaddr * addr)526 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
527 {
528 	struct ucma_abi_bind_addr *cmd;
529 	struct cma_id_private *id_priv;
530 	void *msg;
531 	int ret, size, addrlen;
532 
533 	addrlen = ucma_addrlen(addr);
534 	if (!addrlen)
535 		return ERR(EINVAL);
536 
537 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_BIND_ADDR, size);
538 	id_priv = container_of(id, struct cma_id_private, id);
539 	cmd->id = id_priv->handle;
540 	memcpy(&cmd->addr, addr, addrlen);
541 
542 	ret = write(id->channel->fd, msg, size);
543 	if (ret != size)
544 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
545 
546 	return ucma_query_route(id);
547 }
548 
rdma_resolve_addr(struct rdma_cm_id * id,struct sockaddr * src_addr,struct sockaddr * dst_addr,int timeout_ms)549 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
550 		      struct sockaddr *dst_addr, int timeout_ms)
551 {
552 	struct ucma_abi_resolve_addr *cmd;
553 	struct cma_id_private *id_priv;
554 	void *msg;
555 	int ret, size, daddrlen;
556 
557 	daddrlen = ucma_addrlen(dst_addr);
558 	if (!daddrlen)
559 		return ERR(EINVAL);
560 
561 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_RESOLVE_ADDR, size);
562 	id_priv = container_of(id, struct cma_id_private, id);
563 	cmd->id = id_priv->handle;
564 	if (src_addr)
565 		memcpy(&cmd->src_addr, src_addr, ucma_addrlen(src_addr));
566 	memcpy(&cmd->dst_addr, dst_addr, daddrlen);
567 	cmd->timeout_ms = timeout_ms;
568 
569 	ret = write(id->channel->fd, msg, size);
570 	if (ret != size)
571 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
572 
573 	memcpy(&id->route.addr.dst_addr, dst_addr, daddrlen);
574 	return 0;
575 }
576 
rdma_resolve_route(struct rdma_cm_id * id,int timeout_ms)577 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
578 {
579 	struct ucma_abi_resolve_route *cmd;
580 	struct cma_id_private *id_priv;
581 	void *msg;
582 	int ret, size;
583 
584 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_RESOLVE_ROUTE, size);
585 	id_priv = container_of(id, struct cma_id_private, id);
586 	cmd->id = id_priv->handle;
587 	cmd->timeout_ms = timeout_ms;
588 
589 	ret = write(id->channel->fd, msg, size);
590 	if (ret != size)
591 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
592 
593 	return 0;
594 }
595 
ucma_is_ud_ps(enum rdma_port_space ps)596 static int ucma_is_ud_ps(enum rdma_port_space ps)
597 {
598 	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
599 }
600 
rdma_init_qp_attr(struct rdma_cm_id * id,struct ibv_qp_attr * qp_attr,int * qp_attr_mask)601 static int rdma_init_qp_attr(struct rdma_cm_id *id, struct ibv_qp_attr *qp_attr,
602 			     int *qp_attr_mask)
603 {
604 	struct ucma_abi_init_qp_attr *cmd;
605 	struct ibv_kern_qp_attr *resp;
606 	struct cma_id_private *id_priv;
607 	void *msg;
608 	int ret, size;
609 
610 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_INIT_QP_ATTR, size);
611 	id_priv = container_of(id, struct cma_id_private, id);
612 	cmd->id = id_priv->handle;
613 	cmd->qp_state = qp_attr->qp_state;
614 
615 	ret = write(id->channel->fd, msg, size);
616 	if (ret != size)
617 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
618 
619 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
620 
621 	ibv_copy_qp_attr_from_kern(qp_attr, resp);
622 	*qp_attr_mask = resp->qp_attr_mask;
623 	return 0;
624 }
625 
ucma_modify_qp_rtr(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)626 static int ucma_modify_qp_rtr(struct rdma_cm_id *id,
627 			      struct rdma_conn_param *conn_param)
628 {
629 	struct ibv_qp_attr qp_attr;
630 	int qp_attr_mask, ret;
631 
632 	if (!id->qp)
633 		return ERR(EINVAL);
634 
635 	/* Need to update QP attributes from default values. */
636 	qp_attr.qp_state = IBV_QPS_INIT;
637 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
638 	if (ret)
639 		return ret;
640 
641 	ret = ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
642 	if (ret)
643 		return ret;
644 
645 	qp_attr.qp_state = IBV_QPS_RTR;
646 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
647 	if (ret)
648 		return ret;
649 
650 	if (conn_param)
651 		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
652 	return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
653 }
654 
ucma_modify_qp_rts(struct rdma_cm_id * id)655 static int ucma_modify_qp_rts(struct rdma_cm_id *id)
656 {
657 	struct ibv_qp_attr qp_attr;
658 	int qp_attr_mask, ret;
659 
660 	qp_attr.qp_state = IBV_QPS_RTS;
661 	ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
662 	if (ret)
663 		return ret;
664 
665 	return ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
666 }
667 
ucma_modify_qp_sqd(struct rdma_cm_id * id)668 static int ucma_modify_qp_sqd(struct rdma_cm_id *id)
669 {
670 	struct ibv_qp_attr qp_attr;
671 
672 	if (!id->qp)
673 		return 0;
674 
675 	qp_attr.qp_state = IBV_QPS_SQD;
676 	return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE);
677 }
678 
ucma_modify_qp_err(struct rdma_cm_id * id)679 static int ucma_modify_qp_err(struct rdma_cm_id *id)
680 {
681 	struct ibv_qp_attr qp_attr;
682 
683 	if (!id->qp)
684 		return 0;
685 
686 	qp_attr.qp_state = IBV_QPS_ERR;
687 	return ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE);
688 }
689 
ucma_find_pkey(struct cma_device * cma_dev,uint8_t port_num,uint16_t pkey,uint16_t * pkey_index)690 static int ucma_find_pkey(struct cma_device *cma_dev, uint8_t port_num,
691 			  uint16_t pkey, uint16_t *pkey_index)
692 {
693 	int ret, i;
694 	uint16_t chk_pkey;
695 
696 	for (i = 0, ret = 0; !ret; i++) {
697 		ret = ibv_query_pkey(cma_dev->verbs, port_num, i, &chk_pkey);
698 		if (!ret && pkey == chk_pkey) {
699 			*pkey_index = (uint16_t) i;
700 			return 0;
701 		}
702 	}
703 	return ERR(EINVAL);
704 }
705 
ucma_init_conn_qp3(struct cma_id_private * id_priv,struct ibv_qp * qp)706 static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
707 {
708 	struct ibv_qp_attr qp_attr;
709 	int ret;
710 
711 	ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
712 			     id_priv->id.route.addr.addr.ibaddr.pkey,
713 			     &qp_attr.pkey_index);
714 	if (ret)
715 		return ret;
716 
717 	qp_attr.port_num = id_priv->id.port_num;
718 	qp_attr.qp_state = IBV_QPS_INIT;
719 	qp_attr.qp_access_flags = 0;
720 
721 	return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS |
722 					   IBV_QP_PKEY_INDEX | IBV_QP_PORT);
723 }
724 
ucma_init_conn_qp(struct cma_id_private * id_priv,struct ibv_qp * qp)725 static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
726 {
727 	struct ibv_qp_attr qp_attr;
728 	int qp_attr_mask, ret;
729 
730 	if (abi_ver == 3)
731 		return ucma_init_conn_qp3(id_priv, qp);
732 
733 	qp_attr.qp_state = IBV_QPS_INIT;
734 	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
735 	if (ret)
736 		return ret;
737 
738 	return ibv_modify_qp(qp, &qp_attr, qp_attr_mask);
739 }
740 
ucma_init_ud_qp3(struct cma_id_private * id_priv,struct ibv_qp * qp)741 static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
742 {
743 	struct ibv_qp_attr qp_attr;
744 	int ret;
745 
746 	ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
747 			     id_priv->id.route.addr.addr.ibaddr.pkey,
748 			     &qp_attr.pkey_index);
749 	if (ret)
750 		return ret;
751 
752 	qp_attr.port_num = id_priv->id.port_num;
753 	qp_attr.qp_state = IBV_QPS_INIT;
754 	qp_attr.qkey = RDMA_UDP_QKEY;
755 
756 	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY |
757 					  IBV_QP_PKEY_INDEX | IBV_QP_PORT);
758 	if (ret)
759 		return ret;
760 
761 	qp_attr.qp_state = IBV_QPS_RTR;
762 	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
763 	if (ret)
764 		return ret;
765 
766 	qp_attr.qp_state = IBV_QPS_RTS;
767 	qp_attr.sq_psn = 0;
768 	return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
769 }
770 
ucma_init_ud_qp(struct cma_id_private * id_priv,struct ibv_qp * qp)771 static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
772 {
773 	struct ibv_qp_attr qp_attr;
774 	int qp_attr_mask, ret;
775 
776 	if (abi_ver == 3)
777 		return ucma_init_ud_qp3(id_priv, qp);
778 
779 	qp_attr.qp_state = IBV_QPS_INIT;
780 	ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
781 	if (ret)
782 		return ret;
783 
784 	ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask);
785 	if (ret)
786 		return ret;
787 
788 	qp_attr.qp_state = IBV_QPS_RTR;
789 	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
790 	if (ret)
791 		return ret;
792 
793 	qp_attr.qp_state = IBV_QPS_RTS;
794 	qp_attr.sq_psn = 0;
795 	return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
796 }
797 
rdma_create_qp(struct rdma_cm_id * id,struct ibv_pd * pd,struct ibv_qp_init_attr * qp_init_attr)798 int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
799 		   struct ibv_qp_init_attr *qp_init_attr)
800 {
801 	struct cma_id_private *id_priv;
802 	struct ibv_qp *qp;
803 	int ret;
804 
805 	id_priv = container_of(id, struct cma_id_private, id);
806 	if (id->verbs != pd->context)
807 		return ERR(EINVAL);
808 
809 	qp = ibv_create_qp(pd, qp_init_attr);
810 	if (!qp)
811 		return ERR(ENOMEM);
812 
813 	if (ucma_is_ud_ps(id->ps))
814 		ret = ucma_init_ud_qp(id_priv, qp);
815 	else
816 		ret = ucma_init_conn_qp(id_priv, qp);
817 	if (ret)
818 		goto err;
819 
820 	id->qp = qp;
821 	return 0;
822 err:
823 	ibv_destroy_qp(qp);
824 	return ret;
825 }
826 
rdma_destroy_qp(struct rdma_cm_id * id)827 void rdma_destroy_qp(struct rdma_cm_id *id)
828 {
829 	ibv_destroy_qp(id->qp);
830 }
831 
ucma_valid_param(struct cma_id_private * id_priv,struct rdma_conn_param * conn_param)832 static int ucma_valid_param(struct cma_id_private *id_priv,
833 			    struct rdma_conn_param *conn_param)
834 {
835 	if (id_priv->id.ps != RDMA_PS_TCP)
836 		return 0;
837 
838 	if ((conn_param->responder_resources >
839 	     id_priv->cma_dev->max_responder_resources) ||
840 	    (conn_param->initiator_depth >
841 	     id_priv->cma_dev->max_initiator_depth))
842 		return ERR(EINVAL);
843 
844 	return 0;
845 }
846 
ucma_copy_conn_param_to_kern(struct ucma_abi_conn_param * dst,struct rdma_conn_param * src,uint32_t qp_num,uint8_t srq)847 static void ucma_copy_conn_param_to_kern(struct ucma_abi_conn_param *dst,
848 					 struct rdma_conn_param *src,
849 					 uint32_t qp_num, uint8_t srq)
850 {
851 	dst->qp_num = qp_num;
852 	dst->srq = srq;
853 	dst->responder_resources = src->responder_resources;
854 	dst->initiator_depth = src->initiator_depth;
855 	dst->flow_control = src->flow_control;
856 	dst->retry_count = src->retry_count;
857 	dst->rnr_retry_count = src->rnr_retry_count;
858 	dst->valid = 1;
859 
860 	if (src->private_data && src->private_data_len) {
861 		memcpy(dst->private_data, src->private_data,
862 		       src->private_data_len);
863 		dst->private_data_len = src->private_data_len;
864 	}
865 }
866 
rdma_connect(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)867 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
868 {
869 	struct ucma_abi_connect *cmd;
870 	struct cma_id_private *id_priv;
871 	void *msg;
872 	int ret, size;
873 
874 	id_priv = container_of(id, struct cma_id_private, id);
875 	ret = ucma_valid_param(id_priv, conn_param);
876 	if (ret)
877 		return ret;
878 
879 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_CONNECT, size);
880 	cmd->id = id_priv->handle;
881 	if (id->qp)
882 		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
883 					     id->qp->qp_num,
884 					     (id->qp->srq != NULL));
885 	else
886 		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
887 					     conn_param->qp_num,
888 					     conn_param->srq);
889 
890 	ret = write(id->channel->fd, msg, size);
891 	if (ret != size)
892 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
893 
894 	return 0;
895 }
896 
rdma_listen(struct rdma_cm_id * id,int backlog)897 int rdma_listen(struct rdma_cm_id *id, int backlog)
898 {
899 	struct ucma_abi_listen *cmd;
900 	struct cma_id_private *id_priv;
901 	void *msg;
902 	int ret, size;
903 
904 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_LISTEN, size);
905 	id_priv = container_of(id, struct cma_id_private, id);
906 	cmd->id = id_priv->handle;
907 	cmd->backlog = backlog;
908 
909 	ret = write(id->channel->fd, msg, size);
910 	if (ret != size)
911 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
912 
913 	return ucma_query_route(id);
914 }
915 
rdma_accept(struct rdma_cm_id * id,struct rdma_conn_param * conn_param)916 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
917 {
918 	struct ucma_abi_accept *cmd;
919 	struct cma_id_private *id_priv;
920 	void *msg;
921 	int ret, size;
922 
923 	id_priv = container_of(id, struct cma_id_private, id);
924 	ret = ucma_valid_param(id_priv, conn_param);
925 	if (ret)
926 		return ret;
927 
928 	if (!ucma_is_ud_ps(id->ps)) {
929 		ret = ucma_modify_qp_rtr(id, conn_param);
930 		if (ret)
931 			return ret;
932 	}
933 
934 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
935 	cmd->id = id_priv->handle;
936 	cmd->uid = (uintptr_t) id_priv;
937 	if (id->qp)
938 		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
939 					     id->qp->qp_num,
940 					     (id->qp->srq != NULL));
941 	else
942 		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
943 					     conn_param->qp_num,
944 					     conn_param->srq);
945 
946 	ret = write(id->channel->fd, msg, size);
947 	if (ret != size) {
948 		ucma_modify_qp_err(id);
949 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
950 	}
951 
952 	return 0;
953 }
954 
rdma_reject(struct rdma_cm_id * id,const void * private_data,uint8_t private_data_len)955 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
956 		uint8_t private_data_len)
957 {
958 	struct ucma_abi_reject *cmd;
959 	struct cma_id_private *id_priv;
960 	void *msg;
961 	int ret, size;
962 
963 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_REJECT, size);
964 
965 	id_priv = container_of(id, struct cma_id_private, id);
966 	cmd->id = id_priv->handle;
967 	if (private_data && private_data_len) {
968 		memcpy(cmd->private_data, private_data, private_data_len);
969 		cmd->private_data_len = private_data_len;
970 	} else
971 		cmd->private_data_len = 0;
972 
973 	ret = write(id->channel->fd, msg, size);
974 	if (ret != size)
975 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
976 
977 	return 0;
978 }
979 
rdma_notify(struct rdma_cm_id * id,enum ibv_event_type event)980 int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event)
981 {
982 	struct ucma_abi_notify *cmd;
983 	struct cma_id_private *id_priv;
984 	void *msg;
985 	int ret, size;
986 
987 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_NOTIFY, size);
988 
989 	id_priv = container_of(id, struct cma_id_private, id);
990 	cmd->id = id_priv->handle;
991 	cmd->event = event;
992 	ret = write(id->channel->fd, msg, size);
993 	if (ret != size)
994 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
995 
996 	return 0;
997 }
998 
rdma_disconnect(struct rdma_cm_id * id)999 int rdma_disconnect(struct rdma_cm_id *id)
1000 {
1001 	struct ucma_abi_disconnect *cmd;
1002 	struct cma_id_private *id_priv;
1003 	void *msg;
1004 	int ret, size;
1005 
1006 	switch (id->verbs->device->transport_type) {
1007 	case IBV_TRANSPORT_IB:
1008 		ret = ucma_modify_qp_err(id);
1009 		break;
1010 	case IBV_TRANSPORT_IWARP:
1011 		ret = ucma_modify_qp_sqd(id);
1012 		break;
1013 	default:
1014 		ret = ERR(EINVAL);
1015 	}
1016 	if (ret)
1017 		return ret;
1018 
1019 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_DISCONNECT, size);
1020 	id_priv = container_of(id, struct cma_id_private, id);
1021 	cmd->id = id_priv->handle;
1022 
1023 	ret = write(id->channel->fd, msg, size);
1024 	if (ret != size)
1025 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1026 
1027 	return 0;
1028 }
1029 
rdma_join_multicast(struct rdma_cm_id * id,struct sockaddr * addr,void * context)1030 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
1031 			void *context)
1032 {
1033 	struct ucma_abi_join_mcast *cmd;
1034 	struct ucma_abi_create_id_resp *resp;
1035 	struct cma_id_private *id_priv;
1036 	struct cma_multicast *mc, **pos;
1037 	void *msg;
1038 	int ret, size, addrlen;
1039 
1040 	id_priv = container_of(id, struct cma_id_private, id);
1041 	addrlen = ucma_addrlen(addr);
1042 	if (!addrlen)
1043 		return ERR(EINVAL);
1044 
1045 	mc = malloc(sizeof *mc);
1046 	if (!mc)
1047 		return ERR(ENOMEM);
1048 
1049 	memset(mc, 0, sizeof *mc);
1050 	mc->context = context;
1051 	mc->id_priv = id_priv;
1052 	memcpy(&mc->addr, addr, addrlen);
1053 	if (pthread_cond_init(&mc->cond, NULL)) {
1054 		ret = -1;
1055 		goto err1;
1056 	}
1057 
1058 	pthread_mutex_lock(&id_priv->mut);
1059 	mc->next = id_priv->mc_list;
1060 	id_priv->mc_list = mc;
1061 	pthread_mutex_unlock(&id_priv->mut);
1062 
1063 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_JOIN_MCAST, size);
1064 	cmd->id = id_priv->handle;
1065 	memcpy(&cmd->addr, addr, addrlen);
1066 	cmd->uid = (uintptr_t) mc;
1067 
1068 	ret = write(id->channel->fd, msg, size);
1069 	if (ret != size) {
1070 		ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1071 		goto err2;
1072 	}
1073 
1074 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1075 
1076 	mc->handle = resp->id;
1077 	return 0;
1078 err2:
1079 	pthread_mutex_lock(&id_priv->mut);
1080 	for (pos = &id_priv->mc_list; *pos != mc; pos = &(*pos)->next)
1081 		;
1082 	*pos = mc->next;
1083 	pthread_mutex_unlock(&id_priv->mut);
1084 err1:
1085 	free(mc);
1086 	return ret;
1087 }
1088 
rdma_leave_multicast(struct rdma_cm_id * id,struct sockaddr * addr)1089 int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
1090 {
1091 	struct ucma_abi_destroy_id *cmd;
1092 	struct ucma_abi_destroy_id_resp *resp;
1093 	struct cma_id_private *id_priv;
1094 	struct cma_multicast *mc, **pos;
1095 	void *msg;
1096 	int ret, size, addrlen;
1097 
1098 	addrlen = ucma_addrlen(addr);
1099 	if (!addrlen)
1100 		return ERR(EINVAL);
1101 
1102 	id_priv = container_of(id, struct cma_id_private, id);
1103 	pthread_mutex_lock(&id_priv->mut);
1104 	for (pos = &id_priv->mc_list; *pos; pos = &(*pos)->next)
1105 		if (!memcmp(&(*pos)->addr, addr, addrlen))
1106 			break;
1107 
1108 	mc = *pos;
1109 	if (*pos)
1110 		*pos = mc->next;
1111 	pthread_mutex_unlock(&id_priv->mut);
1112 	if (!mc)
1113 		return ERR(EADDRNOTAVAIL);
1114 
1115 	if (id->qp)
1116 		ibv_detach_mcast(id->qp, &mc->mgid, mc->mlid);
1117 
1118 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_LEAVE_MCAST, size);
1119 	cmd->id = mc->handle;
1120 
1121 	ret = write(id->channel->fd, msg, size);
1122 	if (ret != size) {
1123 		ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1124 		goto free;
1125 	}
1126 
1127 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1128 
1129 	pthread_mutex_lock(&id_priv->mut);
1130 	while (mc->events_completed < resp->events_reported)
1131 		pthread_cond_wait(&mc->cond, &id_priv->mut);
1132 	pthread_mutex_unlock(&id_priv->mut);
1133 
1134 	ret = 0;
1135 free:
1136 	free(mc);
1137 	return ret;
1138 }
1139 
ucma_complete_event(struct cma_id_private * id_priv)1140 static void ucma_complete_event(struct cma_id_private *id_priv)
1141 {
1142 	pthread_mutex_lock(&id_priv->mut);
1143 	id_priv->events_completed++;
1144 	pthread_cond_signal(&id_priv->cond);
1145 	pthread_mutex_unlock(&id_priv->mut);
1146 }
1147 
ucma_complete_mc_event(struct cma_multicast * mc)1148 static void ucma_complete_mc_event(struct cma_multicast *mc)
1149 {
1150 	pthread_mutex_lock(&mc->id_priv->mut);
1151 	mc->events_completed++;
1152 	pthread_cond_signal(&mc->cond);
1153 	mc->id_priv->events_completed++;
1154 	pthread_cond_signal(&mc->id_priv->cond);
1155 	pthread_mutex_unlock(&mc->id_priv->mut);
1156 }
1157 
rdma_ack_cm_event(struct rdma_cm_event * event)1158 int rdma_ack_cm_event(struct rdma_cm_event *event)
1159 {
1160 	struct cma_event *evt;
1161 
1162 	if (!event)
1163 		return ERR(EINVAL);
1164 
1165 	evt = container_of(event, struct cma_event, event);
1166 
1167 	if (evt->mc)
1168 		ucma_complete_mc_event(evt->mc);
1169 	else
1170 		ucma_complete_event(evt->id_priv);
1171 	free(evt);
1172 	return 0;
1173 }
1174 
ucma_process_conn_req(struct cma_event * evt,uint32_t handle)1175 static int ucma_process_conn_req(struct cma_event *evt,
1176 				 uint32_t handle)
1177 {
1178 	struct cma_id_private *id_priv;
1179 	int ret;
1180 
1181 	id_priv = ucma_alloc_id(evt->id_priv->id.channel,
1182 				evt->id_priv->id.context, evt->id_priv->id.ps);
1183 	if (!id_priv) {
1184 		ucma_destroy_kern_id(evt->id_priv->id.channel->fd, handle);
1185 		ret = ERR(ENOMEM);
1186 		goto err;
1187 	}
1188 
1189 	evt->event.listen_id = &evt->id_priv->id;
1190 	evt->event.id = &id_priv->id;
1191 	id_priv->handle = handle;
1192 
1193 	ret = ucma_query_route(&id_priv->id);
1194 	if (ret) {
1195 		rdma_destroy_id(&id_priv->id);
1196 		goto err;
1197 	}
1198 
1199 	return 0;
1200 err:
1201 	ucma_complete_event(evt->id_priv);
1202 	return ret;
1203 }
1204 
ucma_process_conn_resp(struct cma_id_private * id_priv)1205 static int ucma_process_conn_resp(struct cma_id_private *id_priv)
1206 {
1207 	struct ucma_abi_accept *cmd;
1208 	void *msg;
1209 	int ret, size;
1210 
1211 	ret = ucma_modify_qp_rtr(&id_priv->id, NULL);
1212 	if (ret)
1213 		goto err;
1214 
1215 	ret = ucma_modify_qp_rts(&id_priv->id);
1216 	if (ret)
1217 		goto err;
1218 
1219 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
1220 	cmd->id = id_priv->handle;
1221 
1222 	ret = write(id_priv->id.channel->fd, msg, size);
1223 	if (ret != size) {
1224 		ret = (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1225 		goto err;
1226 	}
1227 
1228 	return 0;
1229 err:
1230 	ucma_modify_qp_err(&id_priv->id);
1231 	return ret;
1232 }
1233 
ucma_process_establish(struct rdma_cm_id * id)1234 static int ucma_process_establish(struct rdma_cm_id *id)
1235 {
1236 	int ret;
1237 
1238 	ret = ucma_modify_qp_rts(id);
1239 	if (ret)
1240 		ucma_modify_qp_err(id);
1241 
1242 	return ret;
1243 }
1244 
ucma_process_join(struct cma_event * evt)1245 static int ucma_process_join(struct cma_event *evt)
1246 {
1247 	evt->mc->mgid = evt->event.param.ud.ah_attr.grh.dgid;
1248 	evt->mc->mlid = evt->event.param.ud.ah_attr.dlid;
1249 
1250 	if (!evt->id_priv->id.qp)
1251 		return 0;
1252 
1253 	return ibv_attach_mcast(evt->id_priv->id.qp, &evt->mc->mgid,
1254 				evt->mc->mlid);
1255 }
1256 
ucma_copy_conn_event(struct cma_event * event,struct ucma_abi_conn_param * src)1257 static void ucma_copy_conn_event(struct cma_event *event,
1258 				 struct ucma_abi_conn_param *src)
1259 {
1260 	struct rdma_conn_param *dst = &event->event.param.conn;
1261 
1262 	dst->private_data_len = src->private_data_len;
1263 	if (src->private_data_len) {
1264 		dst->private_data = &event->private_data;
1265 		memcpy(&event->private_data, src->private_data,
1266 		       src->private_data_len);
1267 	}
1268 
1269 	dst->responder_resources = src->responder_resources;
1270 	dst->initiator_depth = src->initiator_depth;
1271 	dst->flow_control = src->flow_control;
1272 	dst->retry_count = src->retry_count;
1273 	dst->rnr_retry_count = src->rnr_retry_count;
1274 	dst->srq = src->srq;
1275 	dst->qp_num = src->qp_num;
1276 }
1277 
ucma_copy_ud_event(struct cma_event * event,struct ucma_abi_ud_param * src)1278 static void ucma_copy_ud_event(struct cma_event *event,
1279 			       struct ucma_abi_ud_param *src)
1280 {
1281 	struct rdma_ud_param *dst = &event->event.param.ud;
1282 
1283 	dst->private_data_len = src->private_data_len;
1284 	if (src->private_data_len) {
1285 		dst->private_data = &event->private_data;
1286 		memcpy(&event->private_data, src->private_data,
1287 		       src->private_data_len);
1288 	}
1289 
1290 	ibv_copy_ah_attr_from_kern(&dst->ah_attr, &src->ah_attr);
1291 	dst->qp_num = src->qp_num;
1292 	dst->qkey = src->qkey;
1293 }
1294 
rdma_get_cm_event(struct rdma_event_channel * channel,struct rdma_cm_event ** event)1295 int rdma_get_cm_event(struct rdma_event_channel *channel,
1296 		      struct rdma_cm_event **event)
1297 {
1298 	struct ucma_abi_event_resp *resp;
1299 	struct ucma_abi_get_event *cmd;
1300 	struct cma_event *evt;
1301 	void *msg;
1302 	int ret, size;
1303 
1304 	ret = cma_dev_cnt ? 0 : ucma_init();
1305 	if (ret)
1306 		return ret;
1307 
1308 	if (!event)
1309 		return ERR(EINVAL);
1310 
1311 	evt = malloc(sizeof *evt);
1312 	if (!evt)
1313 		return ERR(ENOMEM);
1314 
1315 retry:
1316 	memset(evt, 0, sizeof *evt);
1317 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_GET_EVENT, size);
1318 	ret = write(channel->fd, msg, size);
1319 	if (ret != size) {
1320 		free(evt);
1321 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1322 	}
1323 
1324 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1325 
1326 	evt->event.event = resp->event;
1327 	evt->id_priv = (void *) (uintptr_t) resp->uid;
1328 	evt->event.id = &evt->id_priv->id;
1329 	evt->event.status = resp->status;
1330 
1331 	switch (resp->event) {
1332 	case RDMA_CM_EVENT_ADDR_RESOLVED:
1333 		evt->event.status = ucma_query_route(&evt->id_priv->id);
1334 		if (evt->event.status)
1335 			evt->event.event = RDMA_CM_EVENT_ADDR_ERROR;
1336 		break;
1337 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
1338 		evt->event.status = ucma_query_route(&evt->id_priv->id);
1339 		if (evt->event.status)
1340 			evt->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1341 		break;
1342 	case RDMA_CM_EVENT_CONNECT_REQUEST:
1343 		evt->id_priv = (void *) (uintptr_t) resp->uid;
1344 		if (ucma_is_ud_ps(evt->id_priv->id.ps))
1345 			ucma_copy_ud_event(evt, &resp->param.ud);
1346 		else
1347 			ucma_copy_conn_event(evt, &resp->param.conn);
1348 
1349 		ret = ucma_process_conn_req(evt, resp->id);
1350 		if (ret)
1351 			goto retry;
1352 		break;
1353 	case RDMA_CM_EVENT_CONNECT_RESPONSE:
1354 		ucma_copy_conn_event(evt, &resp->param.conn);
1355 		evt->event.status = ucma_process_conn_resp(evt->id_priv);
1356 		if (!evt->event.status)
1357 			evt->event.event = RDMA_CM_EVENT_ESTABLISHED;
1358 		else {
1359 			evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1360 			evt->id_priv->connect_error = 1;
1361 		}
1362 		break;
1363 	case RDMA_CM_EVENT_ESTABLISHED:
1364 		if (ucma_is_ud_ps(evt->id_priv->id.ps)) {
1365 			ucma_copy_ud_event(evt, &resp->param.ud);
1366 			break;
1367 		}
1368 
1369 		ucma_copy_conn_event(evt, &resp->param.conn);
1370 		evt->event.status = ucma_process_establish(&evt->id_priv->id);
1371 		if (evt->event.status) {
1372 			evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1373 			evt->id_priv->connect_error = 1;
1374 		}
1375 		break;
1376 	case RDMA_CM_EVENT_REJECTED:
1377 		if (evt->id_priv->connect_error) {
1378 			ucma_complete_event(evt->id_priv);
1379 			goto retry;
1380 		}
1381 		ucma_copy_conn_event(evt, &resp->param.conn);
1382 		ucma_modify_qp_err(evt->event.id);
1383 		break;
1384 	case RDMA_CM_EVENT_DISCONNECTED:
1385 		if (evt->id_priv->connect_error) {
1386 			ucma_complete_event(evt->id_priv);
1387 			goto retry;
1388 		}
1389 		ucma_copy_conn_event(evt, &resp->param.conn);
1390 		break;
1391 	case RDMA_CM_EVENT_MULTICAST_JOIN:
1392 		evt->mc = (void *) (uintptr_t) resp->uid;
1393 		evt->id_priv = evt->mc->id_priv;
1394 		evt->event.id = &evt->id_priv->id;
1395 		ucma_copy_ud_event(evt, &resp->param.ud);
1396 		evt->event.param.ud.private_data = evt->mc->context;
1397 		evt->event.status = ucma_process_join(evt);
1398 		if (evt->event.status)
1399 			evt->event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
1400 		break;
1401 	case RDMA_CM_EVENT_MULTICAST_ERROR:
1402 		evt->mc = (void *) (uintptr_t) resp->uid;
1403 		evt->id_priv = evt->mc->id_priv;
1404 		evt->event.id = &evt->id_priv->id;
1405 		evt->event.param.ud.private_data = evt->mc->context;
1406 		break;
1407 	default:
1408 		evt->id_priv = (void *) (uintptr_t) resp->uid;
1409 		evt->event.id = &evt->id_priv->id;
1410 		evt->event.status = resp->status;
1411 		if (ucma_is_ud_ps(evt->id_priv->id.ps))
1412 			ucma_copy_ud_event(evt, &resp->param.ud);
1413 		else
1414 			ucma_copy_conn_event(evt, &resp->param.conn);
1415 		break;
1416 	}
1417 
1418 	*event = &evt->event;
1419 	return 0;
1420 }
1421 
rdma_event_str(enum rdma_cm_event_type event)1422 const char *rdma_event_str(enum rdma_cm_event_type event)
1423 {
1424 	switch (event) {
1425 	case RDMA_CM_EVENT_ADDR_RESOLVED:
1426 		return "RDMA_CM_EVENT_ADDR_RESOLVED";
1427 	case RDMA_CM_EVENT_ADDR_ERROR:
1428 		return "RDMA_CM_EVENT_ADDR_ERROR";
1429 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
1430 		return "RDMA_CM_EVENT_ROUTE_RESOLVED";
1431 	case RDMA_CM_EVENT_ROUTE_ERROR:
1432 		return "RDMA_CM_EVENT_ROUTE_ERROR";
1433 	case RDMA_CM_EVENT_CONNECT_REQUEST:
1434 		return "RDMA_CM_EVENT_CONNECT_REQUEST";
1435 	case RDMA_CM_EVENT_CONNECT_RESPONSE:
1436 		return "RDMA_CM_EVENT_CONNECT_RESPONSE";
1437 	case RDMA_CM_EVENT_CONNECT_ERROR:
1438 		return "RDMA_CM_EVENT_CONNECT_ERROR";
1439 	case RDMA_CM_EVENT_UNREACHABLE:
1440 		return "RDMA_CM_EVENT_UNREACHABLE";
1441 	case RDMA_CM_EVENT_REJECTED:
1442 		return "RDMA_CM_EVENT_REJECTED";
1443 	case RDMA_CM_EVENT_ESTABLISHED:
1444 		return "RDMA_CM_EVENT_ESTABLISHED";
1445 	case RDMA_CM_EVENT_DISCONNECTED:
1446 		return "RDMA_CM_EVENT_DISCONNECTED";
1447 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
1448 		return "RDMA_CM_EVENT_DEVICE_REMOVAL";
1449 	case RDMA_CM_EVENT_MULTICAST_JOIN:
1450 		return "RDMA_CM_EVENT_MULTICAST_JOIN";
1451 	case RDMA_CM_EVENT_MULTICAST_ERROR:
1452 		return "RDMA_CM_EVENT_MULTICAST_ERROR";
1453 	case RDMA_CM_EVENT_ADDR_CHANGE:
1454 		return "RDMA_CM_EVENT_ADDR_CHANGE";
1455 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
1456 		return "RDMA_CM_EVENT_TIMEWAIT_EXIT";
1457 	default:
1458 		return "UNKNOWN EVENT";
1459 	}
1460 }
1461 
rdma_set_option(struct rdma_cm_id * id,int level,int optname,void * optval,size_t optlen)1462 int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
1463 		    void *optval, size_t optlen)
1464 {
1465 	struct ucma_abi_set_option *cmd;
1466 	struct cma_id_private *id_priv;
1467 	void *msg;
1468 	int ret, size;
1469 
1470 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_SET_OPTION, size);
1471 	id_priv = container_of(id, struct cma_id_private, id);
1472 	cmd->id = id_priv->handle;
1473 	cmd->optval = (uintptr_t) optval;
1474 	cmd->level = level;
1475 	cmd->optname = optname;
1476 	cmd->optlen = optlen;
1477 
1478 	ret = write(id->channel->fd, msg, size);
1479 	if (ret != size)
1480 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1481 
1482 	return 0;
1483 }
1484 
rdma_migrate_id(struct rdma_cm_id * id,struct rdma_event_channel * channel)1485 int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel)
1486 {
1487 	struct ucma_abi_migrate_resp *resp;
1488 	struct ucma_abi_migrate_id *cmd;
1489 	struct cma_id_private *id_priv;
1490 	void *msg;
1491 	int ret, size;
1492 
1493 	id_priv = container_of(id, struct cma_id_private, id);
1494 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_MIGRATE_ID, size);
1495 	cmd->id = id_priv->handle;
1496 	cmd->fd = id->channel->fd;
1497 
1498 	ret = write(channel->fd, msg, size);
1499 	if (ret != size)
1500 		return (ret >= 0) ? ERR(ECONNREFUSED) : -1;
1501 
1502 	VALGRIND_MAKE_MEM_DEFINED(resp, sizeof *resp);
1503 
1504 	/*
1505 	 * Eventually if we want to support migrating channels while events are
1506 	 * being processed on the current channel, we need to block here while
1507 	 * there are any outstanding events on the current channel for this id
1508 	 * to prevent the user from processing events for this id on the old
1509 	 * channel after this call returns.
1510 	 */
1511 	pthread_mutex_lock(&id_priv->mut);
1512 	id->channel = channel;
1513 	while (id_priv->events_completed < resp->events_reported)
1514 		pthread_cond_wait(&id_priv->cond, &id_priv->mut);
1515 	pthread_mutex_unlock(&id_priv->mut);
1516 
1517 	return 0;
1518 }
1519