xref: /NextBSD/sys/ofed/drivers/infiniband/core/uverbs_main.c (revision 4557fabb34e865d7f40be64b39c9e34fa41dbb60)
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6  * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  */
36 
37 #include <linux/module.h>
38 #include <linux/device.h>
39 #include <linux/err.h>
40 #include <linux/fs.h>
41 #include <linux/poll.h>
42 #include <linux/sched.h>
43 #include <linux/file.h>
44 #include <linux/cdev.h>
45 #include <linux/slab.h>
46 #include <linux/ktime.h>
47 #include <linux/rbtree.h>
48 #include <linux/math64.h>
49 
50 #include <asm/uaccess.h>
51 
52 #include "uverbs.h"
53 
54 MODULE_AUTHOR("Roland Dreier");
55 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
56 MODULE_LICENSE("Dual BSD/GPL");
57 
58 enum {
59 	IB_UVERBS_MAJOR       = 231,
60 	IB_UVERBS_BASE_MINOR  = 192,
61 	IB_UVERBS_MAX_DEVICES = 32
62 };
63 
64 #define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
65 
uverbs_copy_from_udata_ex(void * dest,struct ib_udata * udata,size_t len)66 static int uverbs_copy_from_udata_ex(void *dest, struct ib_udata *udata, size_t len)
67 {
68 	return copy_from_user(dest, udata->inbuf, min(udata->inlen, len)) ? -EFAULT : 0;
69 }
70 
uverbs_copy_to_udata_ex(struct ib_udata * udata,void * src,size_t len)71 static int uverbs_copy_to_udata_ex(struct ib_udata *udata, void *src, size_t len)
72 {
73 	return copy_to_user(udata->outbuf, src, min(udata->outlen, len)) ? -EFAULT : 0;
74 }
75 
76 static struct ib_udata_ops uverbs_copy_ex = {
77 	.copy_from = uverbs_copy_from_udata_ex,
78 	.copy_to   = uverbs_copy_to_udata_ex
79 };
80 
81 #define INIT_UDATA_EX(udata, ibuf, obuf, ilen, olen)		\
82 	do {							\
83 		(udata)->ops    = &uverbs_copy_ex;		\
84 		(udata)->inbuf  = (void __user *)(unsigned long)(ibuf);	\
85 		(udata)->outbuf = (void __user *)(unsigned long)(obuf);	\
86 		(udata)->inlen  = (ilen);			\
87 		(udata)->outlen = (olen);			\
88 	} while (0)
89 
90 
91 static struct class *uverbs_class;
92 
93 DEFINE_SPINLOCK(ib_uverbs_idr_lock);
94 DEFINE_IDR(ib_uverbs_pd_idr);
95 DEFINE_IDR(ib_uverbs_mr_idr);
96 DEFINE_IDR(ib_uverbs_mw_idr);
97 DEFINE_IDR(ib_uverbs_ah_idr);
98 DEFINE_IDR(ib_uverbs_cq_idr);
99 DEFINE_IDR(ib_uverbs_qp_idr);
100 DEFINE_IDR(ib_uverbs_srq_idr);
101 DEFINE_IDR(ib_uverbs_xrcd_idr);
102 DEFINE_IDR(ib_uverbs_rule_idr);
103 DEFINE_IDR(ib_uverbs_dct_idr);
104 
105 static DEFINE_SPINLOCK(map_lock);
106 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
107 
108 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
109 				     const char __user *buf, int in_len,
110 				     int out_len) = {
111 	[IB_USER_VERBS_CMD_GET_CONTEXT]   	= ib_uverbs_get_context,
112 	[IB_USER_VERBS_CMD_QUERY_DEVICE]  	= ib_uverbs_query_device,
113 	[IB_USER_VERBS_CMD_QUERY_PORT]    	= ib_uverbs_query_port,
114 	[IB_USER_VERBS_CMD_ALLOC_PD]      	= ib_uverbs_alloc_pd,
115 	[IB_USER_VERBS_CMD_DEALLOC_PD]    	= ib_uverbs_dealloc_pd,
116 	[IB_USER_VERBS_CMD_REG_MR]        	= ib_uverbs_reg_mr,
117 	[IB_USER_VERBS_CMD_DEREG_MR]      	= ib_uverbs_dereg_mr,
118 	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
119 	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
120 	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
121 	[IB_USER_VERBS_CMD_CREATE_CQ]     	= ib_uverbs_create_cq,
122 	[IB_USER_VERBS_CMD_RESIZE_CQ]     	= ib_uverbs_resize_cq,
123 	[IB_USER_VERBS_CMD_POLL_CQ]     	= ib_uverbs_poll_cq,
124 	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]     	= ib_uverbs_req_notify_cq,
125 	[IB_USER_VERBS_CMD_DESTROY_CQ]    	= ib_uverbs_destroy_cq,
126 	[IB_USER_VERBS_CMD_CREATE_QP]     	= ib_uverbs_create_qp,
127 	[IB_USER_VERBS_CMD_QUERY_QP]     	= ib_uverbs_query_qp,
128 	[IB_USER_VERBS_CMD_MODIFY_QP]     	= ib_uverbs_modify_qp,
129 	[IB_USER_VERBS_CMD_DESTROY_QP]    	= ib_uverbs_destroy_qp,
130 	[IB_USER_VERBS_CMD_POST_SEND]    	= ib_uverbs_post_send,
131 	[IB_USER_VERBS_CMD_POST_RECV]    	= ib_uverbs_post_recv,
132 	[IB_USER_VERBS_CMD_POST_SRQ_RECV]    	= ib_uverbs_post_srq_recv,
133 	[IB_USER_VERBS_CMD_CREATE_AH]    	= ib_uverbs_create_ah,
134 	[IB_USER_VERBS_CMD_DESTROY_AH]    	= ib_uverbs_destroy_ah,
135 	[IB_USER_VERBS_CMD_ATTACH_MCAST]  	= ib_uverbs_attach_mcast,
136 	[IB_USER_VERBS_CMD_DETACH_MCAST]  	= ib_uverbs_detach_mcast,
137 	[IB_USER_VERBS_CMD_CREATE_SRQ]    	= ib_uverbs_create_srq,
138 	[IB_USER_VERBS_CMD_MODIFY_SRQ]    	= ib_uverbs_modify_srq,
139 	[IB_USER_VERBS_CMD_QUERY_SRQ]     	= ib_uverbs_query_srq,
140 	[IB_USER_VERBS_CMD_DESTROY_SRQ]   	= ib_uverbs_destroy_srq,
141 	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
142 	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
143 	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,
144 	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp,
145 };
146 
147 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
148 				    struct ib_udata *ucore,
149 				    struct ib_udata *uhw) = {
150 	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
151 	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
152 };
153 
154 static ssize_t (*uverbs_exp_cmd_table[])(struct ib_uverbs_file *file,
155 					 struct ib_udata *ucore,
156 					 struct ib_udata *uhw) = {
157 	[IB_USER_VERBS_EXP_CMD_CREATE_QP]	= ib_uverbs_exp_create_qp,
158 	[IB_USER_VERBS_EXP_CMD_MODIFY_CQ]	= ib_uverbs_exp_modify_cq,
159 	[IB_USER_VERBS_EXP_CMD_MODIFY_QP]	= ib_uverbs_exp_modify_qp,
160 	[IB_USER_VERBS_EXP_CMD_CREATE_CQ]	= ib_uverbs_exp_create_cq,
161 	[IB_USER_VERBS_EXP_CMD_QUERY_DEVICE]	= ib_uverbs_exp_query_device,
162 	[IB_USER_VERBS_EXP_CMD_CREATE_DCT]	= ib_uverbs_exp_create_dct,
163 	[IB_USER_VERBS_EXP_CMD_DESTROY_DCT]	= ib_uverbs_exp_destroy_dct,
164 	[IB_USER_VERBS_EXP_CMD_QUERY_DCT]	= ib_uverbs_exp_query_dct,
165 };
166 
167 static void ib_uverbs_add_one(struct ib_device *device);
168 static void ib_uverbs_remove_one(struct ib_device *device);
169 
ib_uverbs_release_dev(struct kref * ref)170 static void ib_uverbs_release_dev(struct kref *ref)
171 {
172 	struct ib_uverbs_device *dev =
173 		container_of(ref, struct ib_uverbs_device, ref);
174 
175 	complete(&dev->comp);
176 }
177 
ib_uverbs_release_event_file(struct kref * ref)178 static void ib_uverbs_release_event_file(struct kref *ref)
179 {
180 	struct ib_uverbs_event_file *file =
181 		container_of(ref, struct ib_uverbs_event_file, ref);
182 
183 	kfree(file);
184 }
185 
ib_uverbs_release_ucq(struct ib_uverbs_file * file,struct ib_uverbs_event_file * ev_file,struct ib_ucq_object * uobj)186 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
187 			  struct ib_uverbs_event_file *ev_file,
188 			  struct ib_ucq_object *uobj)
189 {
190 	struct ib_uverbs_event *evt, *tmp;
191 
192 	if (ev_file) {
193 		spin_lock_irq(&ev_file->lock);
194 		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
195 			list_del(&evt->list);
196 			kfree(evt);
197 		}
198 		spin_unlock_irq(&ev_file->lock);
199 
200 		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
201 	}
202 
203 	spin_lock_irq(&file->async_file->lock);
204 	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
205 		list_del(&evt->list);
206 		kfree(evt);
207 	}
208 	spin_unlock_irq(&file->async_file->lock);
209 }
210 
ib_uverbs_release_uevent(struct ib_uverbs_file * file,struct ib_uevent_object * uobj)211 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
212 			      struct ib_uevent_object *uobj)
213 {
214 	struct ib_uverbs_event *evt, *tmp;
215 
216 	spin_lock_irq(&file->async_file->lock);
217 	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
218 		list_del(&evt->list);
219 		kfree(evt);
220 	}
221 	spin_unlock_irq(&file->async_file->lock);
222 }
223 
ib_uverbs_detach_umcast(struct ib_qp * qp,struct ib_uqp_object * uobj)224 static void ib_uverbs_detach_umcast(struct ib_qp *qp,
225 				    struct ib_uqp_object *uobj)
226 {
227 	struct ib_uverbs_mcast_entry *mcast, *tmp;
228 
229 	list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
230 		ib_detach_mcast(qp, &mcast->gid, mcast->lid);
231 		list_del(&mcast->list);
232 		kfree(mcast);
233 	}
234 }
235 
ib_uverbs_cleanup_ucontext(struct ib_uverbs_file * file,struct ib_ucontext * context)236 static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
237 				      struct ib_ucontext *context)
238 {
239 	struct ib_uobject *uobj, *tmp;
240 	int err;
241 
242 	if (!context)
243 		return 0;
244 
245 	context->closing = 1;
246 
247 	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
248 		struct ib_ah *ah = uobj->object;
249 
250 		idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
251 		ib_destroy_ah(ah);
252 		kfree(uobj);
253 	}
254 
255 	/* Remove MWs before QPs, in order to support type 2A MWs. */
256 	list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
257 		struct ib_mw *mw = uobj->object;
258 
259 		idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
260 		err = ib_dealloc_mw(mw);
261 		if (err) {
262 			pr_info("user_verbs: couldn't deallocate MW during cleanup.\n");
263 			pr_info("user_verbs: the system may have become unstable.\n");
264 		}
265 		kfree(uobj);
266 	}
267 	list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
268 		struct ib_flow *flow_id = uobj->object;
269 
270 		idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
271 		ib_destroy_flow(flow_id);
272 		kfree(uobj);
273 	}
274 
275 	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
276 		struct ib_qp *qp = uobj->object;
277 		struct ib_uqp_object *uqp =
278 			container_of(uobj, struct ib_uqp_object, uevent.uobject);
279 
280 		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
281 
282 		ib_uverbs_detach_umcast(qp, uqp);
283 		err = ib_destroy_qp(qp);
284 		if (err)
285 			pr_info("destroying uverbs qp failed: err %d\n", err);
286 
287 		ib_uverbs_release_uevent(file, &uqp->uevent);
288 		kfree(uqp);
289 	}
290 
291 	list_for_each_entry_safe(uobj, tmp, &context->dct_list, list) {
292 		struct ib_dct *dct = uobj->object;
293 		struct ib_udct_object *udct =
294 			container_of(uobj, struct ib_udct_object, uobject);
295 
296 		idr_remove_uobj(&ib_uverbs_dct_idr, uobj);
297 
298 		err = ib_destroy_dct(dct);
299 		if (err)
300 			pr_info("destroying uverbs dct failed: err %d\n", err);
301 
302 		kfree(udct);
303 	}
304 
305 	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
306 		struct ib_srq *srq = uobj->object;
307 		struct ib_uevent_object *uevent =
308 			container_of(uobj, struct ib_uevent_object, uobject);
309 
310 		idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
311 		err = ib_destroy_srq(srq);
312 		if (err)
313 			pr_info("destroying uverbs srq failed: err %d\n", err);
314 		ib_uverbs_release_uevent(file, uevent);
315 		kfree(uevent);
316 	}
317 
318 	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
319 		struct ib_cq *cq = uobj->object;
320 		struct ib_uverbs_event_file *ev_file = cq->cq_context;
321 		struct ib_ucq_object *ucq =
322 			container_of(uobj, struct ib_ucq_object, uobject);
323 
324 		idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
325 		err = ib_destroy_cq(cq);
326 		if (err)
327 			pr_info("destroying uverbs cq failed: err %d\n", err);
328 
329 		ib_uverbs_release_ucq(file, ev_file, ucq);
330 		kfree(ucq);
331 	}
332 
333 	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
334 		struct ib_mr *mr = uobj->object;
335 
336 		idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
337 		err = ib_dereg_mr(mr);
338 		if (err) {
339 			pr_info("user_verbs: couldn't deregister an MR during cleanup.\n");
340 			pr_info("user_verbs: the system may have become unstable.\n");
341 		}
342 		kfree(uobj);
343 	}
344 
345 	mutex_lock(&file->device->xrcd_tree_mutex);
346 	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
347 		struct ib_xrcd *xrcd = uobj->object;
348 		struct ib_uxrcd_object *uxrcd =
349 			container_of(uobj, struct ib_uxrcd_object, uobject);
350 
351 		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
352 		ib_uverbs_dealloc_xrcd(file->device, xrcd);
353 		kfree(uxrcd);
354 	}
355 	mutex_unlock(&file->device->xrcd_tree_mutex);
356 
357 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
358 		struct ib_pd *pd = uobj->object;
359 
360 		idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
361 		ib_dealloc_pd(pd);
362 		kfree(uobj);
363 	}
364 
365 	return context->device->dealloc_ucontext(context);
366 }
367 
ib_uverbs_release_file(struct kref * ref)368 static void ib_uverbs_release_file(struct kref *ref)
369 {
370 	struct ib_uverbs_file *file =
371 		container_of(ref, struct ib_uverbs_file, ref);
372 
373 	module_put(file->device->ib_dev->owner);
374 	kref_put(&file->device->ref, ib_uverbs_release_dev);
375 
376 	kfree(file);
377 }
378 
ib_uverbs_event_read(struct file * filp,char __user * buf,size_t count,loff_t * pos)379 static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
380 				    size_t count, loff_t *pos)
381 {
382 	struct ib_uverbs_event_file *file = filp->private_data;
383 	struct ib_uverbs_event *event;
384 	int eventsz;
385 	int ret = 0;
386 
387 	spin_lock_irq(&file->lock);
388 
389 	while (list_empty(&file->event_list)) {
390 		spin_unlock_irq(&file->lock);
391 
392 		if (filp->f_flags & O_NONBLOCK)
393 			return -EAGAIN;
394 
395 		if (wait_event_interruptible(file->poll_wait,
396 					     !list_empty(&file->event_list)))
397 			return -ERESTARTSYS;
398 
399 		spin_lock_irq(&file->lock);
400 	}
401 
402 	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
403 
404 	if (file->is_async)
405 		eventsz = sizeof (struct ib_uverbs_async_event_desc);
406 	else
407 		eventsz = sizeof (struct ib_uverbs_comp_event_desc);
408 
409 	if (eventsz > count) {
410 		ret   = -EINVAL;
411 		event = NULL;
412 	} else {
413 		list_del(file->event_list.next);
414 		if (event->counter) {
415 			++(*event->counter);
416 			list_del(&event->obj_list);
417 		}
418 	}
419 
420 	spin_unlock_irq(&file->lock);
421 
422 	if (event) {
423 		if (copy_to_user(buf, event, eventsz))
424 			ret = -EFAULT;
425 		else
426 			ret = eventsz;
427 	}
428 
429 	kfree(event);
430 
431 	return ret;
432 }
433 
ib_uverbs_event_poll(struct file * filp,struct poll_table_struct * wait)434 static unsigned int ib_uverbs_event_poll(struct file *filp,
435 					 struct poll_table_struct *wait)
436 {
437 	unsigned int pollflags = 0;
438 	struct ib_uverbs_event_file *file = filp->private_data;
439 
440 	file->filp = filp;
441 	poll_wait(filp, &file->poll_wait, wait);
442 
443 	spin_lock_irq(&file->lock);
444 	if (!list_empty(&file->event_list))
445 		pollflags = POLLIN | POLLRDNORM;
446 	spin_unlock_irq(&file->lock);
447 
448 	return pollflags;
449 }
450 
ib_uverbs_event_fasync(int fd,struct file * filp,int on)451 static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
452 {
453 	struct ib_uverbs_event_file *file = filp->private_data;
454 
455 	return fasync_helper(fd, filp, on, &file->async_queue);
456 }
457 
ib_uverbs_event_close(struct inode * inode,struct file * filp)458 static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
459 {
460 	struct ib_uverbs_event_file *file = filp->private_data;
461 	struct ib_uverbs_event *entry, *tmp;
462 
463 	spin_lock_irq(&file->lock);
464 	file->is_closed = 1;
465 	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
466 		if (entry->counter)
467 			list_del(&entry->obj_list);
468 		kfree(entry);
469 	}
470 	spin_unlock_irq(&file->lock);
471 
472 	if (file->is_async) {
473 		ib_unregister_event_handler(&file->uverbs_file->event_handler);
474 		kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
475 	}
476 	kref_put(&file->ref, ib_uverbs_release_event_file);
477 
478 	return 0;
479 }
480 
481 static const struct file_operations uverbs_event_fops = {
482 	.owner	 = THIS_MODULE,
483 	.read 	 = ib_uverbs_event_read,
484 	.poll    = ib_uverbs_event_poll,
485 	.release = ib_uverbs_event_close,
486 	.fasync  = ib_uverbs_event_fasync,
487 	.llseek	 = no_llseek,
488 };
489 
ib_uverbs_comp_handler(struct ib_cq * cq,void * cq_context)490 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
491 {
492 	struct ib_uverbs_event_file    *file = cq_context;
493 	struct ib_ucq_object	       *uobj;
494 	struct ib_uverbs_event	       *entry;
495 	unsigned long			flags;
496 
497 	if (!file)
498 		return;
499 
500 	spin_lock_irqsave(&file->lock, flags);
501 	if (file->is_closed) {
502 		spin_unlock_irqrestore(&file->lock, flags);
503 		return;
504 	}
505 
506 	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
507 	if (!entry) {
508 		spin_unlock_irqrestore(&file->lock, flags);
509 		return;
510 	}
511 
512 	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
513 
514 	entry->desc.comp.cq_handle = cq->uobject->user_handle;
515 	entry->counter		   = &uobj->comp_events_reported;
516 
517 	list_add_tail(&entry->list, &file->event_list);
518 	list_add_tail(&entry->obj_list, &uobj->comp_list);
519 	spin_unlock_irqrestore(&file->lock, flags);
520 
521 	wake_up_interruptible(&file->poll_wait);
522 	if (file->filp)
523 		selwakeup(&file->filp->f_selinfo);
524 	kill_fasync(&file->async_queue, SIGIO, POLL_IN);
525 }
526 
ib_uverbs_async_handler(struct ib_uverbs_file * file,__u64 element,__u64 event,struct list_head * obj_list,u32 * counter)527 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
528 				    __u64 element, __u64 event,
529 				    struct list_head *obj_list,
530 				    u32 *counter)
531 {
532 	struct ib_uverbs_event *entry;
533 	unsigned long flags;
534 
535 	spin_lock_irqsave(&file->async_file->lock, flags);
536 	if (file->async_file->is_closed) {
537 		spin_unlock_irqrestore(&file->async_file->lock, flags);
538 		return;
539 	}
540 
541 	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
542 	if (!entry) {
543 		spin_unlock_irqrestore(&file->async_file->lock, flags);
544 		return;
545 	}
546 
547 	entry->desc.async.element    = element;
548 	entry->desc.async.event_type = event;
549 	entry->counter               = counter;
550 
551 	list_add_tail(&entry->list, &file->async_file->event_list);
552 	if (obj_list)
553 		list_add_tail(&entry->obj_list, obj_list);
554 	spin_unlock_irqrestore(&file->async_file->lock, flags);
555 
556 	wake_up_interruptible(&file->async_file->poll_wait);
557 	if (file->async_file->filp)
558 		selwakeup(&file->async_file->filp->f_selinfo);
559 	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
560 }
561 
ib_uverbs_cq_event_handler(struct ib_event * event,void * context_ptr)562 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
563 {
564 	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
565 						  struct ib_ucq_object, uobject);
566 
567 	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
568 				event->event, &uobj->async_list,
569 				&uobj->async_events_reported);
570 }
571 
ib_uverbs_qp_event_handler(struct ib_event * event,void * context_ptr)572 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
573 {
574 	struct ib_uevent_object *uobj;
575 
576 	uobj = container_of(event->element.qp->uobject,
577 			    struct ib_uevent_object, uobject);
578 
579 	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
580 				event->event, &uobj->event_list,
581 				&uobj->events_reported);
582 }
583 
ib_uverbs_srq_event_handler(struct ib_event * event,void * context_ptr)584 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
585 {
586 	struct ib_uevent_object *uobj;
587 
588 	uobj = container_of(event->element.srq->uobject,
589 			    struct ib_uevent_object, uobject);
590 
591 	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
592 				event->event, &uobj->event_list,
593 				&uobj->events_reported);
594 }
595 
ib_uverbs_event_handler(struct ib_event_handler * handler,struct ib_event * event)596 void ib_uverbs_event_handler(struct ib_event_handler *handler,
597 			     struct ib_event *event)
598 {
599 	struct ib_uverbs_file *file =
600 		container_of(handler, struct ib_uverbs_file, event_handler);
601 
602 	ib_uverbs_async_handler(file, event->element.port_num, event->event,
603 				NULL, NULL);
604 }
605 
ib_uverbs_alloc_event_file(struct ib_uverbs_file * uverbs_file,int is_async)606 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
607 					int is_async)
608 {
609 	struct ib_uverbs_event_file *ev_file;
610 	struct file *filp;
611 
612 	ev_file = kzalloc(sizeof *ev_file, GFP_KERNEL);
613 	if (!ev_file)
614 		return ERR_PTR(-ENOMEM);
615 
616 	kref_init(&ev_file->ref);
617 	spin_lock_init(&ev_file->lock);
618 	INIT_LIST_HEAD(&ev_file->event_list);
619 	init_waitqueue_head(&ev_file->poll_wait);
620 	ev_file->uverbs_file = uverbs_file;
621 	ev_file->is_async    = is_async;
622 
623 	/*
624 	 * fops_get() can't fail here, because we're coming from a
625 	 * system call on a uverbs file, which will already have a
626 	 * module reference.
627 	 */
628 	filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops));
629 
630 	if (IS_ERR(filp)) {
631 		kfree(ev_file);
632 	} else {
633 	filp->private_data = ev_file;
634 	}
635 
636 	return filp;
637 }
638 
639 /*
640  * Look up a completion event file by FD.  If lookup is successful,
641  * takes a ref to the event file struct that it returns; if
642  * unsuccessful, returns NULL.
643  */
ib_uverbs_lookup_comp_file(int fd)644 struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
645 {
646 	struct ib_uverbs_event_file *ev_file = NULL;
647 	struct fd f = fdget(fd);
648 
649 	if (!f.file)
650 		return NULL;
651 
652 	if (f.file->f_op != &uverbs_event_fops)
653 		goto out;
654 
655 	ev_file = f.file->private_data;
656 	if (ev_file->is_async) {
657 		ev_file = NULL;
658 		goto out;
659 	}
660 
661 	kref_get(&ev_file->ref);
662 
663 out:
664 	fdput(f);
665 	return ev_file;
666 }
667 
verbs_cmd_str(__u32 cmd)668 static const char *verbs_cmd_str(__u32 cmd)
669 {
670 	switch (cmd) {
671 	case IB_USER_VERBS_CMD_GET_CONTEXT:
672 		return "GET_CONTEXT";
673 	case IB_USER_VERBS_CMD_QUERY_DEVICE:
674 		return "QUERY_DEVICE";
675 	case IB_USER_VERBS_CMD_QUERY_PORT:
676 		return "QUERY_PORT";
677 	case IB_USER_VERBS_CMD_ALLOC_PD:
678 		return "ALLOC_PD";
679 	case IB_USER_VERBS_CMD_DEALLOC_PD:
680 		return "DEALLOC_PD";
681 	case IB_USER_VERBS_CMD_REG_MR:
682 		return "REG_MR";
683 	case IB_USER_VERBS_CMD_DEREG_MR:
684 		return "DEREG_MR";
685 	case IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL:
686 		return "CREATE_COMP_CHANNEL";
687 	case IB_USER_VERBS_CMD_CREATE_CQ:
688 		return "CREATE_CQ";
689 	case IB_USER_VERBS_CMD_RESIZE_CQ:
690 		return "RESIZE_CQ";
691 	case IB_USER_VERBS_CMD_POLL_CQ:
692 		return "POLL_CQ";
693 	case IB_USER_VERBS_CMD_REQ_NOTIFY_CQ:
694 		return "REQ_NOTIFY_CQ";
695 	case IB_USER_VERBS_CMD_DESTROY_CQ:
696 		return "DESTROY_CQ";
697 	case IB_USER_VERBS_CMD_CREATE_QP:
698 		return "CREATE_QP";
699 	case IB_USER_VERBS_CMD_QUERY_QP:
700 		return "QUERY_QP";
701 	case IB_USER_VERBS_CMD_MODIFY_QP:
702 		return "MODIFY_QP";
703 	case IB_USER_VERBS_CMD_DESTROY_QP:
704 		return "DESTROY_QP";
705 	case IB_USER_VERBS_CMD_POST_SEND:
706 		return "POST_SEND";
707 	case IB_USER_VERBS_CMD_POST_RECV:
708 		return "POST_RECV";
709 	case IB_USER_VERBS_CMD_POST_SRQ_RECV:
710 		return "POST_SRQ_RECV";
711 	case IB_USER_VERBS_CMD_CREATE_AH:
712 		return "CREATE_AH";
713 	case IB_USER_VERBS_CMD_DESTROY_AH:
714 		return "DESTROY_AH";
715 	case IB_USER_VERBS_CMD_ATTACH_MCAST:
716 		return "ATTACH_MCAST";
717 	case IB_USER_VERBS_CMD_DETACH_MCAST:
718 		return "DETACH_MCAST";
719 	case IB_USER_VERBS_CMD_CREATE_SRQ:
720 		return "CREATE_SRQ";
721 	case IB_USER_VERBS_CMD_MODIFY_SRQ:
722 		return "MODIFY_SRQ";
723 	case IB_USER_VERBS_CMD_QUERY_SRQ:
724 		return "QUERY_SRQ";
725 	case IB_USER_VERBS_CMD_DESTROY_SRQ:
726 		return "DESTROY_SRQ";
727 	case IB_USER_VERBS_CMD_OPEN_XRCD:
728 		return "OPEN_XRCD";
729 	case IB_USER_VERBS_CMD_CLOSE_XRCD:
730 		return "CLOSE_XRCD";
731 	case IB_USER_VERBS_CMD_CREATE_XSRQ:
732 		return "CREATE_XSRQ";
733 	case IB_USER_VERBS_CMD_OPEN_QP:
734 		return "OPEN_QP";
735 	}
736 
737 	return "Unknown command";
738 }
739 
740 enum {
741 	COMMAND_INFO_MASK = 0x1000,
742 };
743 
ib_uverbs_exp_handle_cmd(struct ib_uverbs_file * file,const char __user * buf,struct ib_device * dev,struct ib_uverbs_cmd_hdr * hdr,size_t count,int legacy_ex_cmd)744 static ssize_t ib_uverbs_exp_handle_cmd(struct ib_uverbs_file *file,
745 					const char __user *buf,
746 					struct ib_device *dev,
747 					struct ib_uverbs_cmd_hdr *hdr,
748 					size_t count,
749 					int legacy_ex_cmd)
750 {
751 	struct ib_udata ucore;
752 	struct ib_udata uhw;
753 	struct ib_uverbs_ex_cmd_hdr ex_hdr;
754 	__u32 command = hdr->command - IB_USER_VERBS_EXP_CMD_FIRST;
755 
756 	if (hdr->command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
757 				    IB_USER_VERBS_CMD_COMMAND_MASK))
758 		return -EINVAL;
759 
760 	if (command >= ARRAY_SIZE(uverbs_exp_cmd_table) ||
761 	    !uverbs_exp_cmd_table[command])
762 		return -EINVAL;
763 
764 	if (!file->ucontext)
765 		return -EINVAL;
766 
767 	if (!(dev->uverbs_exp_cmd_mask & (1ull << command)))
768 		return -ENOSYS;
769 
770 	if (legacy_ex_cmd) {
771 		struct ib_uverbs_ex_cmd_hdr_legacy hxl;
772 		struct ib_uverbs_ex_cmd_resp1_legacy resp1;
773 		__u64 response;
774 		ssize_t ret;
775 
776 		if (count < sizeof(hxl))
777 			return -EINVAL;
778 
779 		if (copy_from_user(&hxl, buf, sizeof(hxl)))
780 			return -EFAULT;
781 
782 		if (((hxl.in_words + hxl.provider_in_words) * 4) != count)
783 			return -EINVAL;
784 
785 		count -= sizeof(hxl);
786 		buf += sizeof(hxl);
787 		if (hxl.out_words || hxl.provider_out_words) {
788 			if (count < sizeof(resp1))
789 				return -EINVAL;
790 			if (copy_from_user(&resp1, buf, sizeof(resp1)))
791 				return -EFAULT;
792 			response = resp1.response;
793 			if (!response)
794 				return -EINVAL;
795 
796 			/*
797 			 * Change user buffer to comply with new extension format.
798 			 */
799 			if (sizeof(resp1.comp_mask) != sizeof(resp1.response))
800 				return -EFAULT;
801 			buf += sizeof(resp1.comp_mask);
802 			if (copy_to_user(__DECONST(void __user *, buf), &resp1.comp_mask,
803 					 sizeof(resp1.response)))
804 				return -EFAULT;
805 
806 		} else {
807 			response = 0;
808 		}
809 
810 		INIT_UDATA_EX(&ucore,
811 			      (hxl.in_words) ? buf : 0,
812 			      response,
813 			      hxl.in_words * 4,
814 			      hxl.out_words * 4);
815 
816 		INIT_UDATA_EX(&uhw,
817 			      (hxl.provider_in_words) ? buf + ucore.inlen : 0,
818 			      (hxl.provider_out_words) ? response + ucore.outlen : 0,
819 			      hxl.provider_in_words * 4,
820 			      hxl.provider_out_words * 4);
821 
822 		ret = uverbs_exp_cmd_table[command](file, &ucore, &uhw);
823 		/*
824 		 * UnChange user buffer
825 		 */
826 		if (response && copy_to_user(__DECONST(void __user *, buf), &resp1.response, sizeof(resp1.response)))
827 			return -EFAULT;
828 
829 		return ret;
830 	} else {
831 		if (count < (sizeof(hdr) + sizeof(ex_hdr)))
832 			return -EINVAL;
833 
834 		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
835 			return -EFAULT;
836 
837 		buf += sizeof(hdr) + sizeof(ex_hdr);
838 
839 		if ((hdr->in_words + ex_hdr.provider_in_words) * 8 != count)
840 			return -EINVAL;
841 
842 		if (ex_hdr.response) {
843 			if (!hdr->out_words && !ex_hdr.provider_out_words)
844 				return -EINVAL;
845 		} else {
846 			if (hdr->out_words || ex_hdr.provider_out_words)
847 				return -EINVAL;
848 		}
849 
850 		INIT_UDATA_EX(&ucore,
851 			      (hdr->in_words) ? buf : 0,
852 			      (unsigned long)ex_hdr.response,
853 			      hdr->in_words * 8,
854 			      hdr->out_words * 8);
855 
856 		INIT_UDATA_EX(&uhw,
857 			      (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
858 			      (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
859 			      ex_hdr.provider_in_words * 8,
860 			      ex_hdr.provider_out_words * 8);
861 
862 		return uverbs_exp_cmd_table[command](file, &ucore, &uhw);
863 	}
864 }
865 
ib_uverbs_write(struct file * filp,const char __user * buf,size_t count,loff_t * pos)866 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
867 			     size_t count, loff_t *pos)
868 {
869 	struct ib_uverbs_file *file = filp->private_data;
870 	struct ib_device *dev = file->device->ib_dev;
871 	struct ib_uverbs_cmd_hdr hdr;
872 	struct timespec ts1;
873 	struct timespec ts2;
874 	ktime_t t1, t2, delta;
875 	s64 ds;
876 	ssize_t ret;
877 	u64 dividend;
878 	u32 divisor;
879 	__u32 flags;
880 	__u32 command;
881 	int legacy_ex_cmd = 0;
882 	size_t written_count = count;
883 
884 	if (count < sizeof hdr)
885 		return -EINVAL;
886 
887 	if (copy_from_user(&hdr, buf, sizeof hdr))
888 		return -EFAULT;
889 
890 	/*
891 	 * For BWD compatibility change old style extension verbs commands
892 	 * to their equivalent experimental command.
893 	 */
894 	if ((hdr.command >= IB_USER_VERBS_LEGACY_CMD_FIRST) &&
895 	    (hdr.command <= IB_USER_VERBS_LEGACY_EX_CMD_LAST)) {
896 		hdr.command += IB_USER_VERBS_EXP_CMD_FIRST -
897 			       IB_USER_VERBS_LEGACY_CMD_FIRST;
898 		legacy_ex_cmd = 1;
899 	}
900 
901 	flags = (hdr.command &
902 		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
903 	command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
904 
905 	ktime_get_ts(&ts1);
906 	if (!flags && (command >= IB_USER_VERBS_EXP_CMD_FIRST)) {
907 		ret = ib_uverbs_exp_handle_cmd(file, buf, dev, &hdr, count, legacy_ex_cmd);
908 	} else if (!flags) {
909 		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
910 		    !uverbs_cmd_table[command])
911 			return -EINVAL;
912 
913 		if (!file->ucontext &&
914 		    command != IB_USER_VERBS_CMD_GET_CONTEXT)
915 			return -EINVAL;
916 
917 		if (!(dev->uverbs_cmd_mask & (1ull << command)))
918 			return -ENOSYS;
919 
920 	if (hdr.in_words * 4 != count)
921 		return -EINVAL;
922 
923 		ret = uverbs_cmd_table[command](file,
924 						buf + sizeof(hdr),
925 						hdr.in_words * 4,
926 						hdr.out_words * 4);
927 	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
928 		struct ib_udata ucore;
929 		struct ib_udata uhw;
930 		struct ib_uverbs_ex_cmd_hdr ex_hdr;
931 
932 		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
933 					   IB_USER_VERBS_CMD_COMMAND_MASK))
934 		return -EINVAL;
935 
936 		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
937 		    !uverbs_ex_cmd_table[command])
938 			return -EINVAL;
939 
940 		if (!file->ucontext)
941 			return -EINVAL;
942 
943 		if (!(dev->uverbs_ex_cmd_mask & (1ull << command)))
944 			return -ENOSYS;
945 
946 		if (count < (sizeof(hdr) + sizeof(ex_hdr)))
947 			return -EINVAL;
948 
949 		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
950 			return -EFAULT;
951 
952 		count -= sizeof(hdr) + sizeof(ex_hdr);
953 		buf += sizeof(hdr) + sizeof(ex_hdr);
954 
955 		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
956 			return -EINVAL;
957 
958 		if (ex_hdr.response) {
959 			if (!hdr.out_words && !ex_hdr.provider_out_words)
960 				return -EINVAL;
961 		} else {
962 			if (hdr.out_words || ex_hdr.provider_out_words)
963 		return -EINVAL;
964 		}
965 
966 		INIT_UDATA_EX(&ucore,
967 			      (hdr.in_words) ? buf : 0,
968 			      (unsigned long)ex_hdr.response,
969 			      hdr.in_words * 8,
970 			      hdr.out_words * 8);
971 
972 		INIT_UDATA_EX(&uhw,
973 			      (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
974 			      (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
975 			      ex_hdr.provider_in_words * 8,
976 			      ex_hdr.provider_out_words * 8);
977 
978 		ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
979 
980 		if (ret)
981 			return ret;
982 
983 		return written_count;
984 
985 	} else {
986 		return -EFAULT;
987 	}
988 
989 	if ((dev->cmd_perf & (COMMAND_INFO_MASK - 1)) == hdr.command) {
990 		ktime_get_ts(&ts2);
991 		t1 = timespec_to_ktime(ts1);
992 		t2 = timespec_to_ktime(ts2);
993 		delta = ktime_sub(t2, t1);
994 		ds = ktime_to_ns(delta);
995 		spin_lock(&dev->cmd_perf_lock);
996 		dividend = dev->cmd_avg * dev->cmd_n + ds;
997 		++dev->cmd_n;
998 		divisor = dev->cmd_n;
999 		do_div(dividend, divisor);
1000 		dev->cmd_avg = dividend;
1001 		spin_unlock(&dev->cmd_perf_lock);
1002 		if (dev->cmd_perf & COMMAND_INFO_MASK) {
1003 			pr_info("%s: %s execution time = %lld nsec\n",
1004 				file->device->ib_dev->name,
1005 				verbs_cmd_str(hdr.command),
1006 				(long long)ds);
1007 		}
1008 	}
1009 	return ret;
1010 }
1011 
ib_uverbs_mmap(struct file * filp,struct vm_area_struct * vma)1012 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
1013 {
1014 	struct ib_uverbs_file *file = filp->private_data;
1015 
1016 	if (!file->ucontext)
1017 		return -ENODEV;
1018 	else
1019 		return file->device->ib_dev->mmap(file->ucontext, vma);
1020 }
1021 /* XXX Not supported in FreeBSD */
1022 #if 0
1023 static unsigned long ib_uverbs_get_unmapped_area(struct file *filp,
1024 		unsigned long addr,
1025 		unsigned long len, unsigned long pgoff, unsigned long flags)
1026 {
1027 	struct ib_uverbs_file *file = filp->private_data;
1028 
1029 	if (!file->ucontext)
1030 		return -ENODEV;
1031 	else {
1032 		if (!file->device->ib_dev->get_unmapped_area)
1033 			return current->mm->get_unmapped_area(filp, addr, len,
1034 								pgoff, flags);
1035 
1036 		return file->device->ib_dev->get_unmapped_area(filp, addr, len,
1037 								pgoff, flags);
1038 	}
1039 }
1040 #endif
1041 
ib_uverbs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)1042 static long ib_uverbs_ioctl(struct file *filp,
1043 			   unsigned int cmd, unsigned long arg)
1044 {
1045 	struct ib_uverbs_file *file = filp->private_data;
1046 
1047 	if (!file->device->ib_dev->ioctl)
1048 		return -ENOTSUPP;
1049 
1050 	if (!file->ucontext)
1051 		return -ENODEV;
1052 	else
1053 		/* provider should provide it's own locking mechanism */
1054 		return file->device->ib_dev->ioctl(file->ucontext, cmd, arg);
1055 }
1056 
1057 /*
1058  * ib_uverbs_open() does not need the BKL:
1059  *
1060  *  - the ib_uverbs_device structures are properly reference counted and
1061  *    everything else is purely local to the file being created, so
1062  *    races against other open calls are not a problem;
1063  *  - there is no ioctl method to race against;
1064  *  - the open method will either immediately run -ENXIO, or all
1065  *    required initialization will be done.
1066  */
ib_uverbs_open(struct inode * inode,struct file * filp)1067 static int ib_uverbs_open(struct inode *inode, struct file *filp)
1068 {
1069 	struct ib_uverbs_device *dev;
1070 	struct ib_uverbs_file *file;
1071 	int ret;
1072 
1073 	dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev);
1074 	if (dev)
1075 		kref_get(&dev->ref);
1076 	else
1077 		return -ENXIO;
1078 
1079 	if (!try_module_get(dev->ib_dev->owner)) {
1080 		ret = -ENODEV;
1081 		goto err;
1082 	}
1083 
1084 	file = kmalloc(sizeof *file, GFP_KERNEL);
1085 	if (!file) {
1086 		ret = -ENOMEM;
1087 		goto err_module;
1088 	}
1089 
1090 	file->device	 = dev;
1091 	file->ucontext	 = NULL;
1092 	file->async_file = NULL;
1093 	kref_init(&file->ref);
1094 	mutex_init(&file->mutex);
1095 
1096 	filp->private_data = file;
1097 
1098 	return nonseekable_open(inode, filp);
1099 
1100 err_module:
1101 	module_put(dev->ib_dev->owner);
1102 
1103 err:
1104 	kref_put(&dev->ref, ib_uverbs_release_dev);
1105 	return ret;
1106 }
1107 
ib_uverbs_close(struct inode * inode,struct file * filp)1108 static int ib_uverbs_close(struct inode *inode, struct file *filp)
1109 {
1110 	struct ib_uverbs_file *file = filp->private_data;
1111 
1112 	ib_uverbs_cleanup_ucontext(file, file->ucontext);
1113 
1114 	if (file->async_file)
1115 		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
1116 
1117 	kref_put(&file->ref, ib_uverbs_release_file);
1118 
1119 	return 0;
1120 }
1121 
1122 static const struct file_operations uverbs_fops = {
1123 	.owner 	 = THIS_MODULE,
1124 	.write 	 = ib_uverbs_write,
1125 	.open 	 = ib_uverbs_open,
1126 	.release = ib_uverbs_close,
1127 	.llseek	 = no_llseek,
1128 	.unlocked_ioctl = ib_uverbs_ioctl,
1129 };
1130 
1131 static const struct file_operations uverbs_mmap_fops = {
1132 	.owner 	 = THIS_MODULE,
1133 	.write 	 = ib_uverbs_write,
1134 	.mmap    = ib_uverbs_mmap,
1135 	.open 	 = ib_uverbs_open,
1136 	.release = ib_uverbs_close,
1137 	.llseek	 = no_llseek,
1138 /* XXX Not supported in FreeBSD */
1139 #if 0
1140 	.get_unmapped_area = ib_uverbs_get_unmapped_area,
1141 #endif
1142 	.unlocked_ioctl = ib_uverbs_ioctl,
1143 };
1144 
1145 static struct ib_client uverbs_client = {
1146 	.name   = "uverbs",
1147 	.add    = ib_uverbs_add_one,
1148 	.remove = ib_uverbs_remove_one
1149 };
1150 
show_ibdev(struct device * device,struct device_attribute * attr,char * buf)1151 static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
1152 			  char *buf)
1153 {
1154 	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1155 
1156 	if (!dev)
1157 		return -ENODEV;
1158 
1159 	return sprintf(buf, "%s\n", dev->ib_dev->name);
1160 }
1161 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1162 
show_dev_ref_cnt(struct device * device,struct device_attribute * attr,char * buf)1163 static ssize_t show_dev_ref_cnt(struct device *device,
1164 				struct device_attribute *attr, char *buf)
1165 {
1166 	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1167 
1168 	if (!dev)
1169 		return -ENODEV;
1170 
1171 	return sprintf(buf, "%d\n",  atomic_read(&dev->ref.refcount));
1172 }
1173 static DEVICE_ATTR(ref_cnt, S_IRUGO, show_dev_ref_cnt, NULL);
1174 
show_dev_abi_version(struct device * device,struct device_attribute * attr,char * buf)1175 static ssize_t show_dev_abi_version(struct device *device,
1176 				    struct device_attribute *attr, char *buf)
1177 {
1178 	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1179 
1180 	if (!dev)
1181 		return -ENODEV;
1182 
1183 	return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
1184 }
1185 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1186 
show_abi_version(struct class * class,struct class_attribute * attr,char * buf)1187 static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
1188 {
1189 	return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
1190 }
1191 
1192 static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1193 
1194 static dev_t overflow_maj;
1195 static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
1196 
1197 /*
1198  * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
1199  * requesting a new major number and doubling the number of max devices we
1200  * support. It's stupid, but simple.
1201  */
find_overflow_devnum(void)1202 static int find_overflow_devnum(void)
1203 {
1204 	int ret;
1205 
1206 	if (!overflow_maj) {
1207 		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
1208 					  "infiniband_verbs");
1209 		if (ret) {
1210 			printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
1211 			return ret;
1212 		}
1213 	}
1214 
1215 	ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
1216 	if (ret >= IB_UVERBS_MAX_DEVICES)
1217 		return -1;
1218 
1219 	return ret;
1220 }
1221 #include <linux/pci.h>
1222 
1223 static ssize_t
show_dev_device(struct device * device,struct device_attribute * attr,char * buf)1224 show_dev_device(struct device *device, struct device_attribute *attr, char *buf)
1225 {
1226 	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1227 
1228 	if (!dev)
1229 		return -ENODEV;
1230 
1231 	return sprintf(buf, "0x%04x\n",
1232 	    ((struct pci_dev *)dev->ib_dev->dma_device)->device);
1233 }
1234 static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL);
1235 
1236 static ssize_t
show_dev_vendor(struct device * device,struct device_attribute * attr,char * buf)1237 show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf)
1238 {
1239 	struct ib_uverbs_device *dev = dev_get_drvdata(device);
1240 
1241 	if (!dev)
1242 		return -ENODEV;
1243 
1244 	return sprintf(buf, "0x%04x\n",
1245 	    ((struct pci_dev *)dev->ib_dev->dma_device)->vendor);
1246 }
1247 
1248 static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL);
1249 
1250 struct attribute *device_attrs[] =
1251 {
1252 	&dev_attr_device.attr,
1253 	&dev_attr_vendor.attr,
1254 	NULL
1255 };
1256 
1257 static struct attribute_group device_group = {
1258         .name  = "device",
1259         .attrs  = device_attrs
1260 };
1261 
ib_uverbs_add_one(struct ib_device * device)1262 static void ib_uverbs_add_one(struct ib_device *device)
1263 {
1264 	int devnum;
1265 	dev_t base;
1266 	struct ib_uverbs_device *uverbs_dev;
1267 
1268 	if (!device->alloc_ucontext)
1269 		return;
1270 
1271 	uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
1272 	if (!uverbs_dev)
1273 		return;
1274 
1275 	kref_init(&uverbs_dev->ref);
1276 	init_completion(&uverbs_dev->comp);
1277 	uverbs_dev->xrcd_tree = RB_ROOT;
1278 	mutex_init(&uverbs_dev->xrcd_tree_mutex);
1279 
1280 	spin_lock(&map_lock);
1281 	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
1282 	if (devnum >= IB_UVERBS_MAX_DEVICES) {
1283 		spin_unlock(&map_lock);
1284 		devnum = find_overflow_devnum();
1285 		if (devnum < 0)
1286 		goto err;
1287 
1288 		spin_lock(&map_lock);
1289 		uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
1290 		base = devnum + overflow_maj;
1291 		set_bit(devnum, overflow_map);
1292 	} else {
1293 		uverbs_dev->devnum = devnum;
1294 		base = devnum + IB_UVERBS_BASE_DEV;
1295 		set_bit(devnum, dev_map);
1296 	}
1297 	spin_unlock(&map_lock);
1298 
1299 	uverbs_dev->ib_dev           = device;
1300 	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1301 
1302 	cdev_init(&uverbs_dev->cdev, NULL);
1303 	uverbs_dev->cdev.owner = THIS_MODULE;
1304 	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1305 	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1306 	if (cdev_add(&uverbs_dev->cdev, base, 1))
1307 		goto err_cdev;
1308 
1309 	uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
1310 					uverbs_dev->cdev.dev, uverbs_dev,
1311 					"uverbs%d", uverbs_dev->devnum);
1312 	if (IS_ERR(uverbs_dev->dev))
1313 		goto err_cdev;
1314 
1315 	if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1316 		goto err_class;
1317 	if (device_create_file(uverbs_dev->dev, &dev_attr_ref_cnt))
1318 		goto err_class;
1319 	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1320 		goto err_class;
1321 	if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group))
1322 		goto err_class;
1323 
1324 	ib_set_client_data(device, &uverbs_client, uverbs_dev);
1325 
1326 	return;
1327 
1328 err_class:
1329 	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1330 
1331 err_cdev:
1332 	cdev_del(&uverbs_dev->cdev);
1333 	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
1334 		clear_bit(devnum, dev_map);
1335 	else
1336 		clear_bit(devnum, overflow_map);
1337 
1338 err:
1339 	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
1340 	wait_for_completion(&uverbs_dev->comp);
1341 	kfree(uverbs_dev);
1342 	return;
1343 }
1344 
ib_uverbs_remove_one(struct ib_device * device)1345 static void ib_uverbs_remove_one(struct ib_device *device)
1346 {
1347 	struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
1348 
1349 	if (!uverbs_dev)
1350 		return;
1351 
1352 	sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group);
1353 	dev_set_drvdata(uverbs_dev->dev, NULL);
1354 	device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1355 	cdev_del(&uverbs_dev->cdev);
1356 
1357 	if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
1358 	clear_bit(uverbs_dev->devnum, dev_map);
1359 	else
1360 		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
1361 
1362 	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
1363 	wait_for_completion(&uverbs_dev->comp);
1364 	kfree(uverbs_dev);
1365 }
1366 
uverbs_devnode(struct device * dev,umode_t * mode)1367 static char *uverbs_devnode(struct device *dev, umode_t *mode)
1368 {
1369 	if (mode)
1370 		*mode = 0666;
1371 	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1372 }
1373 
ib_uverbs_init(void)1374 static int __init ib_uverbs_init(void)
1375 {
1376 	int ret;
1377 
1378 	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
1379 				     "infiniband_verbs");
1380 	if (ret) {
1381 		printk(KERN_ERR "user_verbs: couldn't register device number\n");
1382 		goto out;
1383 	}
1384 
1385 	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1386 	if (IS_ERR(uverbs_class)) {
1387 		ret = PTR_ERR(uverbs_class);
1388 		printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
1389 		goto out_chrdev;
1390 	}
1391 
1392 	uverbs_class->devnode = uverbs_devnode;
1393 
1394 	ret = class_create_file(uverbs_class, &class_attr_abi_version);
1395 	if (ret) {
1396 		printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
1397 		goto out_class;
1398 	}
1399 
1400 	ret = ib_register_client(&uverbs_client);
1401 	if (ret) {
1402 		printk(KERN_ERR "user_verbs: couldn't register client\n");
1403 		goto out_class;
1404 	}
1405 
1406 	return 0;
1407 
1408 out_class:
1409 	class_destroy(uverbs_class);
1410 
1411 out_chrdev:
1412 	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1413 
1414 out:
1415 	return ret;
1416 }
1417 
ib_uverbs_cleanup(void)1418 static void __exit ib_uverbs_cleanup(void)
1419 {
1420 	ib_unregister_client(&uverbs_client);
1421 	class_destroy(uverbs_class);
1422 	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1423 	if (overflow_maj)
1424 		unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
1425 	idr_destroy(&ib_uverbs_pd_idr);
1426 	idr_destroy(&ib_uverbs_mr_idr);
1427 	idr_destroy(&ib_uverbs_mw_idr);
1428 	idr_destroy(&ib_uverbs_ah_idr);
1429 	idr_destroy(&ib_uverbs_cq_idr);
1430 	idr_destroy(&ib_uverbs_qp_idr);
1431 	idr_destroy(&ib_uverbs_srq_idr);
1432 }
1433 
1434 module_init(ib_uverbs_init);
1435 module_exit(ib_uverbs_cleanup);
1436