1 /*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
6 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37 #include <linux/module.h>
38 #include <linux/device.h>
39 #include <linux/err.h>
40 #include <linux/fs.h>
41 #include <linux/poll.h>
42 #include <linux/sched.h>
43 #include <linux/file.h>
44 #include <linux/cdev.h>
45 #include <linux/slab.h>
46 #include <linux/ktime.h>
47 #include <linux/rbtree.h>
48 #include <linux/math64.h>
49
50 #include <asm/uaccess.h>
51
52 #include "uverbs.h"
53
54 MODULE_AUTHOR("Roland Dreier");
55 MODULE_DESCRIPTION("InfiniBand userspace verbs access");
56 MODULE_LICENSE("Dual BSD/GPL");
57
58 enum {
59 IB_UVERBS_MAJOR = 231,
60 IB_UVERBS_BASE_MINOR = 192,
61 IB_UVERBS_MAX_DEVICES = 32
62 };
63
64 #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
65
uverbs_copy_from_udata_ex(void * dest,struct ib_udata * udata,size_t len)66 static int uverbs_copy_from_udata_ex(void *dest, struct ib_udata *udata, size_t len)
67 {
68 return copy_from_user(dest, udata->inbuf, min(udata->inlen, len)) ? -EFAULT : 0;
69 }
70
uverbs_copy_to_udata_ex(struct ib_udata * udata,void * src,size_t len)71 static int uverbs_copy_to_udata_ex(struct ib_udata *udata, void *src, size_t len)
72 {
73 return copy_to_user(udata->outbuf, src, min(udata->outlen, len)) ? -EFAULT : 0;
74 }
75
76 static struct ib_udata_ops uverbs_copy_ex = {
77 .copy_from = uverbs_copy_from_udata_ex,
78 .copy_to = uverbs_copy_to_udata_ex
79 };
80
81 #define INIT_UDATA_EX(udata, ibuf, obuf, ilen, olen) \
82 do { \
83 (udata)->ops = &uverbs_copy_ex; \
84 (udata)->inbuf = (void __user *)(unsigned long)(ibuf); \
85 (udata)->outbuf = (void __user *)(unsigned long)(obuf); \
86 (udata)->inlen = (ilen); \
87 (udata)->outlen = (olen); \
88 } while (0)
89
90
91 static struct class *uverbs_class;
92
93 DEFINE_SPINLOCK(ib_uverbs_idr_lock);
94 DEFINE_IDR(ib_uverbs_pd_idr);
95 DEFINE_IDR(ib_uverbs_mr_idr);
96 DEFINE_IDR(ib_uverbs_mw_idr);
97 DEFINE_IDR(ib_uverbs_ah_idr);
98 DEFINE_IDR(ib_uverbs_cq_idr);
99 DEFINE_IDR(ib_uverbs_qp_idr);
100 DEFINE_IDR(ib_uverbs_srq_idr);
101 DEFINE_IDR(ib_uverbs_xrcd_idr);
102 DEFINE_IDR(ib_uverbs_rule_idr);
103 DEFINE_IDR(ib_uverbs_dct_idr);
104
105 static DEFINE_SPINLOCK(map_lock);
106 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
107
108 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
109 const char __user *buf, int in_len,
110 int out_len) = {
111 [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
112 [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
113 [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
114 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
115 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
116 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
117 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
118 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
119 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
120 [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
121 [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
122 [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
123 [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
124 [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
125 [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
126 [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
127 [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
128 [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
129 [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
130 [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
131 [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
132 [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
133 [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
134 [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
135 [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
136 [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
137 [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
138 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
139 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
140 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
141 [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
142 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
143 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
144 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
145 };
146
147 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
148 struct ib_udata *ucore,
149 struct ib_udata *uhw) = {
150 [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
151 [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
152 };
153
154 static ssize_t (*uverbs_exp_cmd_table[])(struct ib_uverbs_file *file,
155 struct ib_udata *ucore,
156 struct ib_udata *uhw) = {
157 [IB_USER_VERBS_EXP_CMD_CREATE_QP] = ib_uverbs_exp_create_qp,
158 [IB_USER_VERBS_EXP_CMD_MODIFY_CQ] = ib_uverbs_exp_modify_cq,
159 [IB_USER_VERBS_EXP_CMD_MODIFY_QP] = ib_uverbs_exp_modify_qp,
160 [IB_USER_VERBS_EXP_CMD_CREATE_CQ] = ib_uverbs_exp_create_cq,
161 [IB_USER_VERBS_EXP_CMD_QUERY_DEVICE] = ib_uverbs_exp_query_device,
162 [IB_USER_VERBS_EXP_CMD_CREATE_DCT] = ib_uverbs_exp_create_dct,
163 [IB_USER_VERBS_EXP_CMD_DESTROY_DCT] = ib_uverbs_exp_destroy_dct,
164 [IB_USER_VERBS_EXP_CMD_QUERY_DCT] = ib_uverbs_exp_query_dct,
165 };
166
167 static void ib_uverbs_add_one(struct ib_device *device);
168 static void ib_uverbs_remove_one(struct ib_device *device);
169
ib_uverbs_release_dev(struct kref * ref)170 static void ib_uverbs_release_dev(struct kref *ref)
171 {
172 struct ib_uverbs_device *dev =
173 container_of(ref, struct ib_uverbs_device, ref);
174
175 complete(&dev->comp);
176 }
177
ib_uverbs_release_event_file(struct kref * ref)178 static void ib_uverbs_release_event_file(struct kref *ref)
179 {
180 struct ib_uverbs_event_file *file =
181 container_of(ref, struct ib_uverbs_event_file, ref);
182
183 kfree(file);
184 }
185
ib_uverbs_release_ucq(struct ib_uverbs_file * file,struct ib_uverbs_event_file * ev_file,struct ib_ucq_object * uobj)186 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
187 struct ib_uverbs_event_file *ev_file,
188 struct ib_ucq_object *uobj)
189 {
190 struct ib_uverbs_event *evt, *tmp;
191
192 if (ev_file) {
193 spin_lock_irq(&ev_file->lock);
194 list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
195 list_del(&evt->list);
196 kfree(evt);
197 }
198 spin_unlock_irq(&ev_file->lock);
199
200 kref_put(&ev_file->ref, ib_uverbs_release_event_file);
201 }
202
203 spin_lock_irq(&file->async_file->lock);
204 list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
205 list_del(&evt->list);
206 kfree(evt);
207 }
208 spin_unlock_irq(&file->async_file->lock);
209 }
210
ib_uverbs_release_uevent(struct ib_uverbs_file * file,struct ib_uevent_object * uobj)211 void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
212 struct ib_uevent_object *uobj)
213 {
214 struct ib_uverbs_event *evt, *tmp;
215
216 spin_lock_irq(&file->async_file->lock);
217 list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
218 list_del(&evt->list);
219 kfree(evt);
220 }
221 spin_unlock_irq(&file->async_file->lock);
222 }
223
ib_uverbs_detach_umcast(struct ib_qp * qp,struct ib_uqp_object * uobj)224 static void ib_uverbs_detach_umcast(struct ib_qp *qp,
225 struct ib_uqp_object *uobj)
226 {
227 struct ib_uverbs_mcast_entry *mcast, *tmp;
228
229 list_for_each_entry_safe(mcast, tmp, &uobj->mcast_list, list) {
230 ib_detach_mcast(qp, &mcast->gid, mcast->lid);
231 list_del(&mcast->list);
232 kfree(mcast);
233 }
234 }
235
ib_uverbs_cleanup_ucontext(struct ib_uverbs_file * file,struct ib_ucontext * context)236 static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
237 struct ib_ucontext *context)
238 {
239 struct ib_uobject *uobj, *tmp;
240 int err;
241
242 if (!context)
243 return 0;
244
245 context->closing = 1;
246
247 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
248 struct ib_ah *ah = uobj->object;
249
250 idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
251 ib_destroy_ah(ah);
252 kfree(uobj);
253 }
254
255 /* Remove MWs before QPs, in order to support type 2A MWs. */
256 list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) {
257 struct ib_mw *mw = uobj->object;
258
259 idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
260 err = ib_dealloc_mw(mw);
261 if (err) {
262 pr_info("user_verbs: couldn't deallocate MW during cleanup.\n");
263 pr_info("user_verbs: the system may have become unstable.\n");
264 }
265 kfree(uobj);
266 }
267 list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) {
268 struct ib_flow *flow_id = uobj->object;
269
270 idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
271 ib_destroy_flow(flow_id);
272 kfree(uobj);
273 }
274
275 list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
276 struct ib_qp *qp = uobj->object;
277 struct ib_uqp_object *uqp =
278 container_of(uobj, struct ib_uqp_object, uevent.uobject);
279
280 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
281
282 ib_uverbs_detach_umcast(qp, uqp);
283 err = ib_destroy_qp(qp);
284 if (err)
285 pr_info("destroying uverbs qp failed: err %d\n", err);
286
287 ib_uverbs_release_uevent(file, &uqp->uevent);
288 kfree(uqp);
289 }
290
291 list_for_each_entry_safe(uobj, tmp, &context->dct_list, list) {
292 struct ib_dct *dct = uobj->object;
293 struct ib_udct_object *udct =
294 container_of(uobj, struct ib_udct_object, uobject);
295
296 idr_remove_uobj(&ib_uverbs_dct_idr, uobj);
297
298 err = ib_destroy_dct(dct);
299 if (err)
300 pr_info("destroying uverbs dct failed: err %d\n", err);
301
302 kfree(udct);
303 }
304
305 list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
306 struct ib_srq *srq = uobj->object;
307 struct ib_uevent_object *uevent =
308 container_of(uobj, struct ib_uevent_object, uobject);
309
310 idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
311 err = ib_destroy_srq(srq);
312 if (err)
313 pr_info("destroying uverbs srq failed: err %d\n", err);
314 ib_uverbs_release_uevent(file, uevent);
315 kfree(uevent);
316 }
317
318 list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
319 struct ib_cq *cq = uobj->object;
320 struct ib_uverbs_event_file *ev_file = cq->cq_context;
321 struct ib_ucq_object *ucq =
322 container_of(uobj, struct ib_ucq_object, uobject);
323
324 idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
325 err = ib_destroy_cq(cq);
326 if (err)
327 pr_info("destroying uverbs cq failed: err %d\n", err);
328
329 ib_uverbs_release_ucq(file, ev_file, ucq);
330 kfree(ucq);
331 }
332
333 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
334 struct ib_mr *mr = uobj->object;
335
336 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
337 err = ib_dereg_mr(mr);
338 if (err) {
339 pr_info("user_verbs: couldn't deregister an MR during cleanup.\n");
340 pr_info("user_verbs: the system may have become unstable.\n");
341 }
342 kfree(uobj);
343 }
344
345 mutex_lock(&file->device->xrcd_tree_mutex);
346 list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
347 struct ib_xrcd *xrcd = uobj->object;
348 struct ib_uxrcd_object *uxrcd =
349 container_of(uobj, struct ib_uxrcd_object, uobject);
350
351 idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
352 ib_uverbs_dealloc_xrcd(file->device, xrcd);
353 kfree(uxrcd);
354 }
355 mutex_unlock(&file->device->xrcd_tree_mutex);
356
357 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
358 struct ib_pd *pd = uobj->object;
359
360 idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
361 ib_dealloc_pd(pd);
362 kfree(uobj);
363 }
364
365 return context->device->dealloc_ucontext(context);
366 }
367
ib_uverbs_release_file(struct kref * ref)368 static void ib_uverbs_release_file(struct kref *ref)
369 {
370 struct ib_uverbs_file *file =
371 container_of(ref, struct ib_uverbs_file, ref);
372
373 module_put(file->device->ib_dev->owner);
374 kref_put(&file->device->ref, ib_uverbs_release_dev);
375
376 kfree(file);
377 }
378
ib_uverbs_event_read(struct file * filp,char __user * buf,size_t count,loff_t * pos)379 static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
380 size_t count, loff_t *pos)
381 {
382 struct ib_uverbs_event_file *file = filp->private_data;
383 struct ib_uverbs_event *event;
384 int eventsz;
385 int ret = 0;
386
387 spin_lock_irq(&file->lock);
388
389 while (list_empty(&file->event_list)) {
390 spin_unlock_irq(&file->lock);
391
392 if (filp->f_flags & O_NONBLOCK)
393 return -EAGAIN;
394
395 if (wait_event_interruptible(file->poll_wait,
396 !list_empty(&file->event_list)))
397 return -ERESTARTSYS;
398
399 spin_lock_irq(&file->lock);
400 }
401
402 event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
403
404 if (file->is_async)
405 eventsz = sizeof (struct ib_uverbs_async_event_desc);
406 else
407 eventsz = sizeof (struct ib_uverbs_comp_event_desc);
408
409 if (eventsz > count) {
410 ret = -EINVAL;
411 event = NULL;
412 } else {
413 list_del(file->event_list.next);
414 if (event->counter) {
415 ++(*event->counter);
416 list_del(&event->obj_list);
417 }
418 }
419
420 spin_unlock_irq(&file->lock);
421
422 if (event) {
423 if (copy_to_user(buf, event, eventsz))
424 ret = -EFAULT;
425 else
426 ret = eventsz;
427 }
428
429 kfree(event);
430
431 return ret;
432 }
433
ib_uverbs_event_poll(struct file * filp,struct poll_table_struct * wait)434 static unsigned int ib_uverbs_event_poll(struct file *filp,
435 struct poll_table_struct *wait)
436 {
437 unsigned int pollflags = 0;
438 struct ib_uverbs_event_file *file = filp->private_data;
439
440 file->filp = filp;
441 poll_wait(filp, &file->poll_wait, wait);
442
443 spin_lock_irq(&file->lock);
444 if (!list_empty(&file->event_list))
445 pollflags = POLLIN | POLLRDNORM;
446 spin_unlock_irq(&file->lock);
447
448 return pollflags;
449 }
450
ib_uverbs_event_fasync(int fd,struct file * filp,int on)451 static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
452 {
453 struct ib_uverbs_event_file *file = filp->private_data;
454
455 return fasync_helper(fd, filp, on, &file->async_queue);
456 }
457
ib_uverbs_event_close(struct inode * inode,struct file * filp)458 static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
459 {
460 struct ib_uverbs_event_file *file = filp->private_data;
461 struct ib_uverbs_event *entry, *tmp;
462
463 spin_lock_irq(&file->lock);
464 file->is_closed = 1;
465 list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
466 if (entry->counter)
467 list_del(&entry->obj_list);
468 kfree(entry);
469 }
470 spin_unlock_irq(&file->lock);
471
472 if (file->is_async) {
473 ib_unregister_event_handler(&file->uverbs_file->event_handler);
474 kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
475 }
476 kref_put(&file->ref, ib_uverbs_release_event_file);
477
478 return 0;
479 }
480
481 static const struct file_operations uverbs_event_fops = {
482 .owner = THIS_MODULE,
483 .read = ib_uverbs_event_read,
484 .poll = ib_uverbs_event_poll,
485 .release = ib_uverbs_event_close,
486 .fasync = ib_uverbs_event_fasync,
487 .llseek = no_llseek,
488 };
489
ib_uverbs_comp_handler(struct ib_cq * cq,void * cq_context)490 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
491 {
492 struct ib_uverbs_event_file *file = cq_context;
493 struct ib_ucq_object *uobj;
494 struct ib_uverbs_event *entry;
495 unsigned long flags;
496
497 if (!file)
498 return;
499
500 spin_lock_irqsave(&file->lock, flags);
501 if (file->is_closed) {
502 spin_unlock_irqrestore(&file->lock, flags);
503 return;
504 }
505
506 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
507 if (!entry) {
508 spin_unlock_irqrestore(&file->lock, flags);
509 return;
510 }
511
512 uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
513
514 entry->desc.comp.cq_handle = cq->uobject->user_handle;
515 entry->counter = &uobj->comp_events_reported;
516
517 list_add_tail(&entry->list, &file->event_list);
518 list_add_tail(&entry->obj_list, &uobj->comp_list);
519 spin_unlock_irqrestore(&file->lock, flags);
520
521 wake_up_interruptible(&file->poll_wait);
522 if (file->filp)
523 selwakeup(&file->filp->f_selinfo);
524 kill_fasync(&file->async_queue, SIGIO, POLL_IN);
525 }
526
ib_uverbs_async_handler(struct ib_uverbs_file * file,__u64 element,__u64 event,struct list_head * obj_list,u32 * counter)527 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
528 __u64 element, __u64 event,
529 struct list_head *obj_list,
530 u32 *counter)
531 {
532 struct ib_uverbs_event *entry;
533 unsigned long flags;
534
535 spin_lock_irqsave(&file->async_file->lock, flags);
536 if (file->async_file->is_closed) {
537 spin_unlock_irqrestore(&file->async_file->lock, flags);
538 return;
539 }
540
541 entry = kmalloc(sizeof *entry, GFP_ATOMIC);
542 if (!entry) {
543 spin_unlock_irqrestore(&file->async_file->lock, flags);
544 return;
545 }
546
547 entry->desc.async.element = element;
548 entry->desc.async.event_type = event;
549 entry->counter = counter;
550
551 list_add_tail(&entry->list, &file->async_file->event_list);
552 if (obj_list)
553 list_add_tail(&entry->obj_list, obj_list);
554 spin_unlock_irqrestore(&file->async_file->lock, flags);
555
556 wake_up_interruptible(&file->async_file->poll_wait);
557 if (file->async_file->filp)
558 selwakeup(&file->async_file->filp->f_selinfo);
559 kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
560 }
561
ib_uverbs_cq_event_handler(struct ib_event * event,void * context_ptr)562 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
563 {
564 struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
565 struct ib_ucq_object, uobject);
566
567 ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
568 event->event, &uobj->async_list,
569 &uobj->async_events_reported);
570 }
571
ib_uverbs_qp_event_handler(struct ib_event * event,void * context_ptr)572 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
573 {
574 struct ib_uevent_object *uobj;
575
576 uobj = container_of(event->element.qp->uobject,
577 struct ib_uevent_object, uobject);
578
579 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
580 event->event, &uobj->event_list,
581 &uobj->events_reported);
582 }
583
ib_uverbs_srq_event_handler(struct ib_event * event,void * context_ptr)584 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
585 {
586 struct ib_uevent_object *uobj;
587
588 uobj = container_of(event->element.srq->uobject,
589 struct ib_uevent_object, uobject);
590
591 ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
592 event->event, &uobj->event_list,
593 &uobj->events_reported);
594 }
595
ib_uverbs_event_handler(struct ib_event_handler * handler,struct ib_event * event)596 void ib_uverbs_event_handler(struct ib_event_handler *handler,
597 struct ib_event *event)
598 {
599 struct ib_uverbs_file *file =
600 container_of(handler, struct ib_uverbs_file, event_handler);
601
602 ib_uverbs_async_handler(file, event->element.port_num, event->event,
603 NULL, NULL);
604 }
605
ib_uverbs_alloc_event_file(struct ib_uverbs_file * uverbs_file,int is_async)606 struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
607 int is_async)
608 {
609 struct ib_uverbs_event_file *ev_file;
610 struct file *filp;
611
612 ev_file = kzalloc(sizeof *ev_file, GFP_KERNEL);
613 if (!ev_file)
614 return ERR_PTR(-ENOMEM);
615
616 kref_init(&ev_file->ref);
617 spin_lock_init(&ev_file->lock);
618 INIT_LIST_HEAD(&ev_file->event_list);
619 init_waitqueue_head(&ev_file->poll_wait);
620 ev_file->uverbs_file = uverbs_file;
621 ev_file->is_async = is_async;
622
623 /*
624 * fops_get() can't fail here, because we're coming from a
625 * system call on a uverbs file, which will already have a
626 * module reference.
627 */
628 filp = alloc_file(FMODE_READ, fops_get(&uverbs_event_fops));
629
630 if (IS_ERR(filp)) {
631 kfree(ev_file);
632 } else {
633 filp->private_data = ev_file;
634 }
635
636 return filp;
637 }
638
639 /*
640 * Look up a completion event file by FD. If lookup is successful,
641 * takes a ref to the event file struct that it returns; if
642 * unsuccessful, returns NULL.
643 */
ib_uverbs_lookup_comp_file(int fd)644 struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
645 {
646 struct ib_uverbs_event_file *ev_file = NULL;
647 struct fd f = fdget(fd);
648
649 if (!f.file)
650 return NULL;
651
652 if (f.file->f_op != &uverbs_event_fops)
653 goto out;
654
655 ev_file = f.file->private_data;
656 if (ev_file->is_async) {
657 ev_file = NULL;
658 goto out;
659 }
660
661 kref_get(&ev_file->ref);
662
663 out:
664 fdput(f);
665 return ev_file;
666 }
667
verbs_cmd_str(__u32 cmd)668 static const char *verbs_cmd_str(__u32 cmd)
669 {
670 switch (cmd) {
671 case IB_USER_VERBS_CMD_GET_CONTEXT:
672 return "GET_CONTEXT";
673 case IB_USER_VERBS_CMD_QUERY_DEVICE:
674 return "QUERY_DEVICE";
675 case IB_USER_VERBS_CMD_QUERY_PORT:
676 return "QUERY_PORT";
677 case IB_USER_VERBS_CMD_ALLOC_PD:
678 return "ALLOC_PD";
679 case IB_USER_VERBS_CMD_DEALLOC_PD:
680 return "DEALLOC_PD";
681 case IB_USER_VERBS_CMD_REG_MR:
682 return "REG_MR";
683 case IB_USER_VERBS_CMD_DEREG_MR:
684 return "DEREG_MR";
685 case IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL:
686 return "CREATE_COMP_CHANNEL";
687 case IB_USER_VERBS_CMD_CREATE_CQ:
688 return "CREATE_CQ";
689 case IB_USER_VERBS_CMD_RESIZE_CQ:
690 return "RESIZE_CQ";
691 case IB_USER_VERBS_CMD_POLL_CQ:
692 return "POLL_CQ";
693 case IB_USER_VERBS_CMD_REQ_NOTIFY_CQ:
694 return "REQ_NOTIFY_CQ";
695 case IB_USER_VERBS_CMD_DESTROY_CQ:
696 return "DESTROY_CQ";
697 case IB_USER_VERBS_CMD_CREATE_QP:
698 return "CREATE_QP";
699 case IB_USER_VERBS_CMD_QUERY_QP:
700 return "QUERY_QP";
701 case IB_USER_VERBS_CMD_MODIFY_QP:
702 return "MODIFY_QP";
703 case IB_USER_VERBS_CMD_DESTROY_QP:
704 return "DESTROY_QP";
705 case IB_USER_VERBS_CMD_POST_SEND:
706 return "POST_SEND";
707 case IB_USER_VERBS_CMD_POST_RECV:
708 return "POST_RECV";
709 case IB_USER_VERBS_CMD_POST_SRQ_RECV:
710 return "POST_SRQ_RECV";
711 case IB_USER_VERBS_CMD_CREATE_AH:
712 return "CREATE_AH";
713 case IB_USER_VERBS_CMD_DESTROY_AH:
714 return "DESTROY_AH";
715 case IB_USER_VERBS_CMD_ATTACH_MCAST:
716 return "ATTACH_MCAST";
717 case IB_USER_VERBS_CMD_DETACH_MCAST:
718 return "DETACH_MCAST";
719 case IB_USER_VERBS_CMD_CREATE_SRQ:
720 return "CREATE_SRQ";
721 case IB_USER_VERBS_CMD_MODIFY_SRQ:
722 return "MODIFY_SRQ";
723 case IB_USER_VERBS_CMD_QUERY_SRQ:
724 return "QUERY_SRQ";
725 case IB_USER_VERBS_CMD_DESTROY_SRQ:
726 return "DESTROY_SRQ";
727 case IB_USER_VERBS_CMD_OPEN_XRCD:
728 return "OPEN_XRCD";
729 case IB_USER_VERBS_CMD_CLOSE_XRCD:
730 return "CLOSE_XRCD";
731 case IB_USER_VERBS_CMD_CREATE_XSRQ:
732 return "CREATE_XSRQ";
733 case IB_USER_VERBS_CMD_OPEN_QP:
734 return "OPEN_QP";
735 }
736
737 return "Unknown command";
738 }
739
740 enum {
741 COMMAND_INFO_MASK = 0x1000,
742 };
743
ib_uverbs_exp_handle_cmd(struct ib_uverbs_file * file,const char __user * buf,struct ib_device * dev,struct ib_uverbs_cmd_hdr * hdr,size_t count,int legacy_ex_cmd)744 static ssize_t ib_uverbs_exp_handle_cmd(struct ib_uverbs_file *file,
745 const char __user *buf,
746 struct ib_device *dev,
747 struct ib_uverbs_cmd_hdr *hdr,
748 size_t count,
749 int legacy_ex_cmd)
750 {
751 struct ib_udata ucore;
752 struct ib_udata uhw;
753 struct ib_uverbs_ex_cmd_hdr ex_hdr;
754 __u32 command = hdr->command - IB_USER_VERBS_EXP_CMD_FIRST;
755
756 if (hdr->command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
757 IB_USER_VERBS_CMD_COMMAND_MASK))
758 return -EINVAL;
759
760 if (command >= ARRAY_SIZE(uverbs_exp_cmd_table) ||
761 !uverbs_exp_cmd_table[command])
762 return -EINVAL;
763
764 if (!file->ucontext)
765 return -EINVAL;
766
767 if (!(dev->uverbs_exp_cmd_mask & (1ull << command)))
768 return -ENOSYS;
769
770 if (legacy_ex_cmd) {
771 struct ib_uverbs_ex_cmd_hdr_legacy hxl;
772 struct ib_uverbs_ex_cmd_resp1_legacy resp1;
773 __u64 response;
774 ssize_t ret;
775
776 if (count < sizeof(hxl))
777 return -EINVAL;
778
779 if (copy_from_user(&hxl, buf, sizeof(hxl)))
780 return -EFAULT;
781
782 if (((hxl.in_words + hxl.provider_in_words) * 4) != count)
783 return -EINVAL;
784
785 count -= sizeof(hxl);
786 buf += sizeof(hxl);
787 if (hxl.out_words || hxl.provider_out_words) {
788 if (count < sizeof(resp1))
789 return -EINVAL;
790 if (copy_from_user(&resp1, buf, sizeof(resp1)))
791 return -EFAULT;
792 response = resp1.response;
793 if (!response)
794 return -EINVAL;
795
796 /*
797 * Change user buffer to comply with new extension format.
798 */
799 if (sizeof(resp1.comp_mask) != sizeof(resp1.response))
800 return -EFAULT;
801 buf += sizeof(resp1.comp_mask);
802 if (copy_to_user(__DECONST(void __user *, buf), &resp1.comp_mask,
803 sizeof(resp1.response)))
804 return -EFAULT;
805
806 } else {
807 response = 0;
808 }
809
810 INIT_UDATA_EX(&ucore,
811 (hxl.in_words) ? buf : 0,
812 response,
813 hxl.in_words * 4,
814 hxl.out_words * 4);
815
816 INIT_UDATA_EX(&uhw,
817 (hxl.provider_in_words) ? buf + ucore.inlen : 0,
818 (hxl.provider_out_words) ? response + ucore.outlen : 0,
819 hxl.provider_in_words * 4,
820 hxl.provider_out_words * 4);
821
822 ret = uverbs_exp_cmd_table[command](file, &ucore, &uhw);
823 /*
824 * UnChange user buffer
825 */
826 if (response && copy_to_user(__DECONST(void __user *, buf), &resp1.response, sizeof(resp1.response)))
827 return -EFAULT;
828
829 return ret;
830 } else {
831 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
832 return -EINVAL;
833
834 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
835 return -EFAULT;
836
837 buf += sizeof(hdr) + sizeof(ex_hdr);
838
839 if ((hdr->in_words + ex_hdr.provider_in_words) * 8 != count)
840 return -EINVAL;
841
842 if (ex_hdr.response) {
843 if (!hdr->out_words && !ex_hdr.provider_out_words)
844 return -EINVAL;
845 } else {
846 if (hdr->out_words || ex_hdr.provider_out_words)
847 return -EINVAL;
848 }
849
850 INIT_UDATA_EX(&ucore,
851 (hdr->in_words) ? buf : 0,
852 (unsigned long)ex_hdr.response,
853 hdr->in_words * 8,
854 hdr->out_words * 8);
855
856 INIT_UDATA_EX(&uhw,
857 (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
858 (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
859 ex_hdr.provider_in_words * 8,
860 ex_hdr.provider_out_words * 8);
861
862 return uverbs_exp_cmd_table[command](file, &ucore, &uhw);
863 }
864 }
865
ib_uverbs_write(struct file * filp,const char __user * buf,size_t count,loff_t * pos)866 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
867 size_t count, loff_t *pos)
868 {
869 struct ib_uverbs_file *file = filp->private_data;
870 struct ib_device *dev = file->device->ib_dev;
871 struct ib_uverbs_cmd_hdr hdr;
872 struct timespec ts1;
873 struct timespec ts2;
874 ktime_t t1, t2, delta;
875 s64 ds;
876 ssize_t ret;
877 u64 dividend;
878 u32 divisor;
879 __u32 flags;
880 __u32 command;
881 int legacy_ex_cmd = 0;
882 size_t written_count = count;
883
884 if (count < sizeof hdr)
885 return -EINVAL;
886
887 if (copy_from_user(&hdr, buf, sizeof hdr))
888 return -EFAULT;
889
890 /*
891 * For BWD compatibility change old style extension verbs commands
892 * to their equivalent experimental command.
893 */
894 if ((hdr.command >= IB_USER_VERBS_LEGACY_CMD_FIRST) &&
895 (hdr.command <= IB_USER_VERBS_LEGACY_EX_CMD_LAST)) {
896 hdr.command += IB_USER_VERBS_EXP_CMD_FIRST -
897 IB_USER_VERBS_LEGACY_CMD_FIRST;
898 legacy_ex_cmd = 1;
899 }
900
901 flags = (hdr.command &
902 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
903 command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
904
905 ktime_get_ts(&ts1);
906 if (!flags && (command >= IB_USER_VERBS_EXP_CMD_FIRST)) {
907 ret = ib_uverbs_exp_handle_cmd(file, buf, dev, &hdr, count, legacy_ex_cmd);
908 } else if (!flags) {
909 if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
910 !uverbs_cmd_table[command])
911 return -EINVAL;
912
913 if (!file->ucontext &&
914 command != IB_USER_VERBS_CMD_GET_CONTEXT)
915 return -EINVAL;
916
917 if (!(dev->uverbs_cmd_mask & (1ull << command)))
918 return -ENOSYS;
919
920 if (hdr.in_words * 4 != count)
921 return -EINVAL;
922
923 ret = uverbs_cmd_table[command](file,
924 buf + sizeof(hdr),
925 hdr.in_words * 4,
926 hdr.out_words * 4);
927 } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
928 struct ib_udata ucore;
929 struct ib_udata uhw;
930 struct ib_uverbs_ex_cmd_hdr ex_hdr;
931
932 if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
933 IB_USER_VERBS_CMD_COMMAND_MASK))
934 return -EINVAL;
935
936 if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
937 !uverbs_ex_cmd_table[command])
938 return -EINVAL;
939
940 if (!file->ucontext)
941 return -EINVAL;
942
943 if (!(dev->uverbs_ex_cmd_mask & (1ull << command)))
944 return -ENOSYS;
945
946 if (count < (sizeof(hdr) + sizeof(ex_hdr)))
947 return -EINVAL;
948
949 if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
950 return -EFAULT;
951
952 count -= sizeof(hdr) + sizeof(ex_hdr);
953 buf += sizeof(hdr) + sizeof(ex_hdr);
954
955 if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
956 return -EINVAL;
957
958 if (ex_hdr.response) {
959 if (!hdr.out_words && !ex_hdr.provider_out_words)
960 return -EINVAL;
961 } else {
962 if (hdr.out_words || ex_hdr.provider_out_words)
963 return -EINVAL;
964 }
965
966 INIT_UDATA_EX(&ucore,
967 (hdr.in_words) ? buf : 0,
968 (unsigned long)ex_hdr.response,
969 hdr.in_words * 8,
970 hdr.out_words * 8);
971
972 INIT_UDATA_EX(&uhw,
973 (ex_hdr.provider_in_words) ? buf + ucore.inlen : 0,
974 (ex_hdr.provider_out_words) ? ex_hdr.response + ucore.outlen : 0,
975 ex_hdr.provider_in_words * 8,
976 ex_hdr.provider_out_words * 8);
977
978 ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
979
980 if (ret)
981 return ret;
982
983 return written_count;
984
985 } else {
986 return -EFAULT;
987 }
988
989 if ((dev->cmd_perf & (COMMAND_INFO_MASK - 1)) == hdr.command) {
990 ktime_get_ts(&ts2);
991 t1 = timespec_to_ktime(ts1);
992 t2 = timespec_to_ktime(ts2);
993 delta = ktime_sub(t2, t1);
994 ds = ktime_to_ns(delta);
995 spin_lock(&dev->cmd_perf_lock);
996 dividend = dev->cmd_avg * dev->cmd_n + ds;
997 ++dev->cmd_n;
998 divisor = dev->cmd_n;
999 do_div(dividend, divisor);
1000 dev->cmd_avg = dividend;
1001 spin_unlock(&dev->cmd_perf_lock);
1002 if (dev->cmd_perf & COMMAND_INFO_MASK) {
1003 pr_info("%s: %s execution time = %lld nsec\n",
1004 file->device->ib_dev->name,
1005 verbs_cmd_str(hdr.command),
1006 (long long)ds);
1007 }
1008 }
1009 return ret;
1010 }
1011
ib_uverbs_mmap(struct file * filp,struct vm_area_struct * vma)1012 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
1013 {
1014 struct ib_uverbs_file *file = filp->private_data;
1015
1016 if (!file->ucontext)
1017 return -ENODEV;
1018 else
1019 return file->device->ib_dev->mmap(file->ucontext, vma);
1020 }
1021 /* XXX Not supported in FreeBSD */
1022 #if 0
1023 static unsigned long ib_uverbs_get_unmapped_area(struct file *filp,
1024 unsigned long addr,
1025 unsigned long len, unsigned long pgoff, unsigned long flags)
1026 {
1027 struct ib_uverbs_file *file = filp->private_data;
1028
1029 if (!file->ucontext)
1030 return -ENODEV;
1031 else {
1032 if (!file->device->ib_dev->get_unmapped_area)
1033 return current->mm->get_unmapped_area(filp, addr, len,
1034 pgoff, flags);
1035
1036 return file->device->ib_dev->get_unmapped_area(filp, addr, len,
1037 pgoff, flags);
1038 }
1039 }
1040 #endif
1041
ib_uverbs_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)1042 static long ib_uverbs_ioctl(struct file *filp,
1043 unsigned int cmd, unsigned long arg)
1044 {
1045 struct ib_uverbs_file *file = filp->private_data;
1046
1047 if (!file->device->ib_dev->ioctl)
1048 return -ENOTSUPP;
1049
1050 if (!file->ucontext)
1051 return -ENODEV;
1052 else
1053 /* provider should provide it's own locking mechanism */
1054 return file->device->ib_dev->ioctl(file->ucontext, cmd, arg);
1055 }
1056
1057 /*
1058 * ib_uverbs_open() does not need the BKL:
1059 *
1060 * - the ib_uverbs_device structures are properly reference counted and
1061 * everything else is purely local to the file being created, so
1062 * races against other open calls are not a problem;
1063 * - there is no ioctl method to race against;
1064 * - the open method will either immediately run -ENXIO, or all
1065 * required initialization will be done.
1066 */
ib_uverbs_open(struct inode * inode,struct file * filp)1067 static int ib_uverbs_open(struct inode *inode, struct file *filp)
1068 {
1069 struct ib_uverbs_device *dev;
1070 struct ib_uverbs_file *file;
1071 int ret;
1072
1073 dev = container_of(inode->i_cdev->si_drv1, struct ib_uverbs_device, cdev);
1074 if (dev)
1075 kref_get(&dev->ref);
1076 else
1077 return -ENXIO;
1078
1079 if (!try_module_get(dev->ib_dev->owner)) {
1080 ret = -ENODEV;
1081 goto err;
1082 }
1083
1084 file = kmalloc(sizeof *file, GFP_KERNEL);
1085 if (!file) {
1086 ret = -ENOMEM;
1087 goto err_module;
1088 }
1089
1090 file->device = dev;
1091 file->ucontext = NULL;
1092 file->async_file = NULL;
1093 kref_init(&file->ref);
1094 mutex_init(&file->mutex);
1095
1096 filp->private_data = file;
1097
1098 return nonseekable_open(inode, filp);
1099
1100 err_module:
1101 module_put(dev->ib_dev->owner);
1102
1103 err:
1104 kref_put(&dev->ref, ib_uverbs_release_dev);
1105 return ret;
1106 }
1107
ib_uverbs_close(struct inode * inode,struct file * filp)1108 static int ib_uverbs_close(struct inode *inode, struct file *filp)
1109 {
1110 struct ib_uverbs_file *file = filp->private_data;
1111
1112 ib_uverbs_cleanup_ucontext(file, file->ucontext);
1113
1114 if (file->async_file)
1115 kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
1116
1117 kref_put(&file->ref, ib_uverbs_release_file);
1118
1119 return 0;
1120 }
1121
1122 static const struct file_operations uverbs_fops = {
1123 .owner = THIS_MODULE,
1124 .write = ib_uverbs_write,
1125 .open = ib_uverbs_open,
1126 .release = ib_uverbs_close,
1127 .llseek = no_llseek,
1128 .unlocked_ioctl = ib_uverbs_ioctl,
1129 };
1130
1131 static const struct file_operations uverbs_mmap_fops = {
1132 .owner = THIS_MODULE,
1133 .write = ib_uverbs_write,
1134 .mmap = ib_uverbs_mmap,
1135 .open = ib_uverbs_open,
1136 .release = ib_uverbs_close,
1137 .llseek = no_llseek,
1138 /* XXX Not supported in FreeBSD */
1139 #if 0
1140 .get_unmapped_area = ib_uverbs_get_unmapped_area,
1141 #endif
1142 .unlocked_ioctl = ib_uverbs_ioctl,
1143 };
1144
1145 static struct ib_client uverbs_client = {
1146 .name = "uverbs",
1147 .add = ib_uverbs_add_one,
1148 .remove = ib_uverbs_remove_one
1149 };
1150
show_ibdev(struct device * device,struct device_attribute * attr,char * buf)1151 static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
1152 char *buf)
1153 {
1154 struct ib_uverbs_device *dev = dev_get_drvdata(device);
1155
1156 if (!dev)
1157 return -ENODEV;
1158
1159 return sprintf(buf, "%s\n", dev->ib_dev->name);
1160 }
1161 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
1162
show_dev_ref_cnt(struct device * device,struct device_attribute * attr,char * buf)1163 static ssize_t show_dev_ref_cnt(struct device *device,
1164 struct device_attribute *attr, char *buf)
1165 {
1166 struct ib_uverbs_device *dev = dev_get_drvdata(device);
1167
1168 if (!dev)
1169 return -ENODEV;
1170
1171 return sprintf(buf, "%d\n", atomic_read(&dev->ref.refcount));
1172 }
1173 static DEVICE_ATTR(ref_cnt, S_IRUGO, show_dev_ref_cnt, NULL);
1174
show_dev_abi_version(struct device * device,struct device_attribute * attr,char * buf)1175 static ssize_t show_dev_abi_version(struct device *device,
1176 struct device_attribute *attr, char *buf)
1177 {
1178 struct ib_uverbs_device *dev = dev_get_drvdata(device);
1179
1180 if (!dev)
1181 return -ENODEV;
1182
1183 return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
1184 }
1185 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
1186
show_abi_version(struct class * class,struct class_attribute * attr,char * buf)1187 static ssize_t show_abi_version(struct class *class, struct class_attribute *attr, char *buf)
1188 {
1189 return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION);
1190 }
1191
1192 static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
1193
1194 static dev_t overflow_maj;
1195 static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
1196
1197 /*
1198 * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
1199 * requesting a new major number and doubling the number of max devices we
1200 * support. It's stupid, but simple.
1201 */
find_overflow_devnum(void)1202 static int find_overflow_devnum(void)
1203 {
1204 int ret;
1205
1206 if (!overflow_maj) {
1207 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
1208 "infiniband_verbs");
1209 if (ret) {
1210 printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
1211 return ret;
1212 }
1213 }
1214
1215 ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
1216 if (ret >= IB_UVERBS_MAX_DEVICES)
1217 return -1;
1218
1219 return ret;
1220 }
1221 #include <linux/pci.h>
1222
1223 static ssize_t
show_dev_device(struct device * device,struct device_attribute * attr,char * buf)1224 show_dev_device(struct device *device, struct device_attribute *attr, char *buf)
1225 {
1226 struct ib_uverbs_device *dev = dev_get_drvdata(device);
1227
1228 if (!dev)
1229 return -ENODEV;
1230
1231 return sprintf(buf, "0x%04x\n",
1232 ((struct pci_dev *)dev->ib_dev->dma_device)->device);
1233 }
1234 static DEVICE_ATTR(device, S_IRUGO, show_dev_device, NULL);
1235
1236 static ssize_t
show_dev_vendor(struct device * device,struct device_attribute * attr,char * buf)1237 show_dev_vendor(struct device *device, struct device_attribute *attr, char *buf)
1238 {
1239 struct ib_uverbs_device *dev = dev_get_drvdata(device);
1240
1241 if (!dev)
1242 return -ENODEV;
1243
1244 return sprintf(buf, "0x%04x\n",
1245 ((struct pci_dev *)dev->ib_dev->dma_device)->vendor);
1246 }
1247
1248 static DEVICE_ATTR(vendor, S_IRUGO, show_dev_vendor, NULL);
1249
1250 struct attribute *device_attrs[] =
1251 {
1252 &dev_attr_device.attr,
1253 &dev_attr_vendor.attr,
1254 NULL
1255 };
1256
1257 static struct attribute_group device_group = {
1258 .name = "device",
1259 .attrs = device_attrs
1260 };
1261
ib_uverbs_add_one(struct ib_device * device)1262 static void ib_uverbs_add_one(struct ib_device *device)
1263 {
1264 int devnum;
1265 dev_t base;
1266 struct ib_uverbs_device *uverbs_dev;
1267
1268 if (!device->alloc_ucontext)
1269 return;
1270
1271 uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
1272 if (!uverbs_dev)
1273 return;
1274
1275 kref_init(&uverbs_dev->ref);
1276 init_completion(&uverbs_dev->comp);
1277 uverbs_dev->xrcd_tree = RB_ROOT;
1278 mutex_init(&uverbs_dev->xrcd_tree_mutex);
1279
1280 spin_lock(&map_lock);
1281 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
1282 if (devnum >= IB_UVERBS_MAX_DEVICES) {
1283 spin_unlock(&map_lock);
1284 devnum = find_overflow_devnum();
1285 if (devnum < 0)
1286 goto err;
1287
1288 spin_lock(&map_lock);
1289 uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
1290 base = devnum + overflow_maj;
1291 set_bit(devnum, overflow_map);
1292 } else {
1293 uverbs_dev->devnum = devnum;
1294 base = devnum + IB_UVERBS_BASE_DEV;
1295 set_bit(devnum, dev_map);
1296 }
1297 spin_unlock(&map_lock);
1298
1299 uverbs_dev->ib_dev = device;
1300 uverbs_dev->num_comp_vectors = device->num_comp_vectors;
1301
1302 cdev_init(&uverbs_dev->cdev, NULL);
1303 uverbs_dev->cdev.owner = THIS_MODULE;
1304 uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
1305 kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
1306 if (cdev_add(&uverbs_dev->cdev, base, 1))
1307 goto err_cdev;
1308
1309 uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
1310 uverbs_dev->cdev.dev, uverbs_dev,
1311 "uverbs%d", uverbs_dev->devnum);
1312 if (IS_ERR(uverbs_dev->dev))
1313 goto err_cdev;
1314
1315 if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
1316 goto err_class;
1317 if (device_create_file(uverbs_dev->dev, &dev_attr_ref_cnt))
1318 goto err_class;
1319 if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
1320 goto err_class;
1321 if (sysfs_create_group(&uverbs_dev->dev->kobj, &device_group))
1322 goto err_class;
1323
1324 ib_set_client_data(device, &uverbs_client, uverbs_dev);
1325
1326 return;
1327
1328 err_class:
1329 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1330
1331 err_cdev:
1332 cdev_del(&uverbs_dev->cdev);
1333 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
1334 clear_bit(devnum, dev_map);
1335 else
1336 clear_bit(devnum, overflow_map);
1337
1338 err:
1339 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
1340 wait_for_completion(&uverbs_dev->comp);
1341 kfree(uverbs_dev);
1342 return;
1343 }
1344
ib_uverbs_remove_one(struct ib_device * device)1345 static void ib_uverbs_remove_one(struct ib_device *device)
1346 {
1347 struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client);
1348
1349 if (!uverbs_dev)
1350 return;
1351
1352 sysfs_remove_group(&uverbs_dev->dev->kobj, &device_group);
1353 dev_set_drvdata(uverbs_dev->dev, NULL);
1354 device_destroy(uverbs_class, uverbs_dev->cdev.dev);
1355 cdev_del(&uverbs_dev->cdev);
1356
1357 if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
1358 clear_bit(uverbs_dev->devnum, dev_map);
1359 else
1360 clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
1361
1362 kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
1363 wait_for_completion(&uverbs_dev->comp);
1364 kfree(uverbs_dev);
1365 }
1366
uverbs_devnode(struct device * dev,umode_t * mode)1367 static char *uverbs_devnode(struct device *dev, umode_t *mode)
1368 {
1369 if (mode)
1370 *mode = 0666;
1371 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1372 }
1373
ib_uverbs_init(void)1374 static int __init ib_uverbs_init(void)
1375 {
1376 int ret;
1377
1378 ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
1379 "infiniband_verbs");
1380 if (ret) {
1381 printk(KERN_ERR "user_verbs: couldn't register device number\n");
1382 goto out;
1383 }
1384
1385 uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
1386 if (IS_ERR(uverbs_class)) {
1387 ret = PTR_ERR(uverbs_class);
1388 printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
1389 goto out_chrdev;
1390 }
1391
1392 uverbs_class->devnode = uverbs_devnode;
1393
1394 ret = class_create_file(uverbs_class, &class_attr_abi_version);
1395 if (ret) {
1396 printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
1397 goto out_class;
1398 }
1399
1400 ret = ib_register_client(&uverbs_client);
1401 if (ret) {
1402 printk(KERN_ERR "user_verbs: couldn't register client\n");
1403 goto out_class;
1404 }
1405
1406 return 0;
1407
1408 out_class:
1409 class_destroy(uverbs_class);
1410
1411 out_chrdev:
1412 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1413
1414 out:
1415 return ret;
1416 }
1417
ib_uverbs_cleanup(void)1418 static void __exit ib_uverbs_cleanup(void)
1419 {
1420 ib_unregister_client(&uverbs_client);
1421 class_destroy(uverbs_class);
1422 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
1423 if (overflow_maj)
1424 unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
1425 idr_destroy(&ib_uverbs_pd_idr);
1426 idr_destroy(&ib_uverbs_mr_idr);
1427 idr_destroy(&ib_uverbs_mw_idr);
1428 idr_destroy(&ib_uverbs_ah_idr);
1429 idr_destroy(&ib_uverbs_cq_idr);
1430 idr_destroy(&ib_uverbs_qp_idr);
1431 idr_destroy(&ib_uverbs_srq_idr);
1432 }
1433
1434 module_init(ib_uverbs_init);
1435 module_exit(ib_uverbs_cleanup);
1436