1 /*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #include <linux/slab.h>
35 #include <linux/module.h>
36 #include <linux/sched.h>
37
38 #include "mlx4_ib.h"
39
convert_access(int acc)40 static u32 convert_access(int acc)
41 {
42 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
43 (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
44 (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
45 (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
46 (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
47 MLX4_PERM_LOCAL_READ;
48 }
49 /* No suuport for Shared MR feature */
50 #if 0
51 static ssize_t shared_mr_proc_read(struct file *file,
52 char __user *buffer,
53 size_t len,
54 loff_t *offset)
55 {
56
57 return -ENOSYS;
58
59 }
60
61 static ssize_t shared_mr_proc_write(struct file *file,
62 const char __user *buffer,
63 size_t len,
64 loff_t *offset)
65 {
66
67 return -ENOSYS;
68 }
69
70 static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
71 {
72
73 struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
74 struct mlx4_shared_mr_info *smr_info =
75 (struct mlx4_shared_mr_info *)pde->data;
76
77 /* Prevent any mapping not on start of area */
78 if (vma->vm_pgoff != 0)
79 return -EINVAL;
80
81 return ib_umem_map_to_vma(smr_info->umem,
82 vma);
83
84 }
85
86 static const struct file_operations shared_mr_proc_ops = {
87 .owner = THIS_MODULE,
88 .read = shared_mr_proc_read,
89 .write = shared_mr_proc_write,
90 .mmap = shared_mr_mmap
91 };
92
93 static mode_t convert_shared_access(int acc)
94 {
95
96 return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) |
97 (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) |
98 (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) |
99 (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) |
100 (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) |
101 (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0);
102
103 }
104 #endif
mlx4_ib_get_dma_mr(struct ib_pd * pd,int acc)105 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
106 {
107 struct mlx4_ib_mr *mr;
108 int err;
109
110 mr = kzalloc(sizeof *mr, GFP_KERNEL);
111 if (!mr)
112 return ERR_PTR(-ENOMEM);
113
114 err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
115 ~0ull, convert_access(acc), 0, 0, &mr->mmr);
116 if (err)
117 goto err_free;
118
119 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
120 if (err)
121 goto err_mr;
122
123 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
124 mr->umem = NULL;
125
126 return &mr->ibmr;
127
128 err_mr:
129 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
130
131 err_free:
132 kfree(mr);
133
134 return ERR_PTR(err);
135 }
136
mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev * dev,struct mlx4_mtt * mtt,u64 mtt_size,u64 mtt_shift,u64 len,u64 cur_start_addr,u64 * pages,int * start_index,int * npages)137 static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
138 struct mlx4_mtt *mtt,
139 u64 mtt_size,
140 u64 mtt_shift,
141 u64 len,
142 u64 cur_start_addr,
143 u64 *pages,
144 int *start_index,
145 int *npages)
146 {
147 int k;
148 int err = 0;
149 u64 mtt_entries;
150 u64 cur_end_addr = cur_start_addr + len;
151 u64 cur_end_addr_aligned = 0;
152
153 len += (cur_start_addr & (mtt_size-1ULL));
154 cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
155 len += (cur_end_addr_aligned - cur_end_addr);
156 if (len & (mtt_size-1ULL)) {
157 WARN(1 ,
158 "write_block: len %llx is not aligned to mtt_size %llx\n",
159 (unsigned long long)len, (unsigned long long)mtt_size);
160 return -EINVAL;
161 }
162
163
164 mtt_entries = (len >> mtt_shift);
165
166 /* Align the MTT start address to
167 the mtt_size.
168 Required to handle cases when the MR
169 starts in the middle of an MTT record.
170 Was not required in old code since
171 the physical addresses provided by
172 the dma subsystem were page aligned,
173 which was also the MTT size.
174 */
175 cur_start_addr = round_down(cur_start_addr, mtt_size);
176 /* A new block is started ...*/
177 for (k = 0; k < mtt_entries; ++k) {
178 pages[*npages] = cur_start_addr + (mtt_size * k);
179 (*npages)++;
180 /*
181 * Be friendly to mlx4_write_mtt() and
182 * pass it chunks of appropriate size.
183 */
184 if (*npages == PAGE_SIZE / sizeof(u64)) {
185 err = mlx4_write_mtt(dev->dev,
186 mtt, *start_index,
187 *npages, pages);
188 if (err)
189 return err;
190
191 (*start_index) += *npages;
192 *npages = 0;
193 }
194 }
195
196 return 0;
197 }
198
mlx4_ib_umem_write_mtt(struct mlx4_ib_dev * dev,struct mlx4_mtt * mtt,struct ib_umem * umem)199 int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
200 struct ib_umem *umem)
201 {
202 u64 *pages;
203 u64 len = 0;
204 int err = 0;
205 u64 mtt_size;
206 u64 cur_start_addr = 0;
207 u64 mtt_shift;
208 int start_index = 0;
209 int npages = 0;
210 struct scatterlist *sg;
211 int i;
212
213 pages = (u64 *) __get_free_page(GFP_KERNEL);
214 if (!pages)
215 return -ENOMEM;
216
217 mtt_shift = mtt->page_shift;
218 mtt_size = 1ULL << mtt_shift;
219
220 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
221 if (cur_start_addr + len ==
222 sg_dma_address(sg)) {
223 /* still the same block */
224 len += sg_dma_len(sg);
225 continue;
226 }
227 /* A new block is started ...*/
228 /* If len is malaligned, write an extra mtt entry to
229 cover the misaligned area (round up the division)
230 */
231 err = mlx4_ib_umem_write_mtt_block(dev,
232 mtt, mtt_size, mtt_shift,
233 len, cur_start_addr,
234 pages,
235 &start_index,
236 &npages);
237 if (err)
238 goto out;
239
240 cur_start_addr =
241 sg_dma_address(sg);
242 len = sg_dma_len(sg);
243 }
244
245 /* Handle the last block */
246 if (len > 0) {
247 /* If len is malaligned, write an extra mtt entry to cover
248 the misaligned area (round up the division)
249 */
250 err = mlx4_ib_umem_write_mtt_block(dev,
251 mtt, mtt_size, mtt_shift,
252 len, cur_start_addr,
253 pages,
254 &start_index,
255 &npages);
256 if (err)
257 goto out;
258 }
259
260
261 if (npages)
262 err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
263
264 out:
265 free_page((unsigned long) pages);
266 return err;
267 }
268
alignment_of(u64 ptr)269 static inline u64 alignment_of(u64 ptr)
270 {
271 return ilog2(ptr & (~(ptr-1)));
272 }
273
mlx4_ib_umem_calc_block_mtt(u64 next_block_start,u64 current_block_end,u64 block_shift)274 static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
275 u64 current_block_end,
276 u64 block_shift)
277 {
278 /* Check whether the alignment of the new block
279 is aligned as well as the previous block.
280 Block address must start with zeros till size of entity_size.
281 */
282 if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
283 /* It is not as well aligned as the
284 previous block-reduce the mtt size
285 accordingly.
286 Here we take the last right bit
287 which is 1.
288 */
289 block_shift = alignment_of(next_block_start);
290
291 /* Check whether the alignment of the
292 end of previous block - is it aligned
293 as well as the start of the block
294 */
295 if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
296 /* It is not as well aligned as
297 the start of the block - reduce the
298 mtt size accordingly.
299 */
300 block_shift = alignment_of(current_block_end);
301
302 return block_shift;
303 }
304
305 /* Calculate optimal mtt size based on contiguous pages.
306 * Function will return also the number of pages that are not aligned to the
307 calculated mtt_size to be added to total number
308 of pages. For that we should check the first chunk length & last chunk
309 length and if not aligned to mtt_size we should increment
310 the non_aligned_pages number.
311 All chunks in the middle already handled as part of mtt shift calculation
312 for both their start & end addresses.
313 */
mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem * umem,u64 start_va,int * num_of_mtts)314 int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
315 u64 start_va,
316 int *num_of_mtts)
317 {
318 u64 block_shift = MLX4_MAX_MTT_SHIFT;
319 u64 current_block_len = 0;
320 u64 current_block_start = 0;
321 u64 misalignment_bits;
322 u64 first_block_start = 0;
323 u64 last_block_end = 0;
324 u64 total_len = 0;
325 u64 last_block_aligned_end = 0;
326 u64 min_shift = ilog2(umem->page_size);
327 struct scatterlist *sg;
328 int i;
329 u64 next_block_start;
330 u64 current_block_end;
331
332 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
333 /* Initialization - save the first chunk start as
334 the current_block_start - block means contiguous pages.
335 */
336 if (current_block_len == 0 && current_block_start == 0) {
337 first_block_start = current_block_start =
338 sg_dma_address(sg);
339 /* Find the bits that are different between
340 the physical address and the virtual
341 address for the start of the MR.
342 */
343 /* umem_get aligned the start_va to a page
344 boundry. Therefore, we need to align the
345 start va to the same boundry */
346 /* misalignment_bits is needed to handle the
347 case of a single memory region. In this
348 case, the rest of the logic will not reduce
349 the block size. If we use a block size
350 which is bigger than the alignment of the
351 misalignment bits, we might use the virtual
352 page number instead of the physical page
353 number, resulting in access to the wrong
354 data. */
355 misalignment_bits =
356 (start_va & (~(((u64)(umem->page_size))-1ULL)))
357 ^ current_block_start;
358 block_shift = min(alignment_of(misalignment_bits)
359 , block_shift);
360 }
361
362 /* Go over the scatter entries and check
363 if they continue the previous scatter entry.
364 */
365 next_block_start =
366 sg_dma_address(sg);
367 current_block_end = current_block_start
368 + current_block_len;
369 /* If we have a split (non-contig.) between two block*/
370 if (current_block_end != next_block_start) {
371 block_shift = mlx4_ib_umem_calc_block_mtt(
372 next_block_start,
373 current_block_end,
374 block_shift);
375
376 /* If we reached the minimum shift for 4k
377 page we stop the loop.
378 */
379 if (block_shift <= min_shift)
380 goto end;
381
382 /* If not saved yet we are in first block -
383 we save the length of first block to
384 calculate the non_aligned_pages number at
385 * the end.
386 */
387 total_len += current_block_len;
388
389 /* Start a new block */
390 current_block_start = next_block_start;
391 current_block_len =
392 sg_dma_len(sg);
393 continue;
394 }
395 /* The scatter entry is another part of
396 the current block, increase the block size
397 * An entry in the scatter can be larger than
398 4k (page) as of dma mapping
399 which merge some blocks together.
400 */
401 current_block_len +=
402 sg_dma_len(sg);
403 }
404
405 /* Account for the last block in the total len */
406 total_len += current_block_len;
407 /* Add to the first block the misalignment that it suffers from.*/
408 total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
409 last_block_end = current_block_start+current_block_len;
410 last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
411 total_len += (last_block_aligned_end - last_block_end);
412
413 WARN((total_len & ((1ULL<<block_shift)-1ULL)),
414 " misaligned total length detected (%llu, %llu)!",
415 (unsigned long long)total_len, (unsigned long long)block_shift);
416
417 *num_of_mtts = total_len >> block_shift;
418 end:
419 if (block_shift < min_shift) {
420 /* If shift is less than the min we set a WARN and
421 return the min shift.
422 */
423 WARN(1,
424 "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
425 (unsigned long long)block_shift);
426
427 block_shift = min_shift;
428 }
429 return block_shift;
430
431 }
432
433 /* No suuport for Shared MR */
434 #if 0
435 static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
436 {
437
438 struct proc_dir_entry *mr_proc_entry;
439 mode_t mode = S_IFREG;
440 char name_buff[16];
441
442 mode |= convert_shared_access(access_flags);
443 sprintf(name_buff, "%X", mr_id);
444 mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
445 mr->smr_info->mr_id = mr_id;
446 mr->smr_info->umem = mr->umem;
447
448 mr_proc_entry = proc_create_data(name_buff, mode,
449 mlx4_mrs_dir_entry,
450 &shared_mr_proc_ops,
451 mr->smr_info);
452
453 if (!mr_proc_entry) {
454 pr_err("prepare_shared_mr failed via proc\n");
455 kfree(mr->smr_info);
456 return -ENODEV;
457 }
458
459 current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
460 mr_proc_entry->size = mr->umem->length;
461 return 0;
462
463 }
464 static int is_shared_mr(int access_flags)
465 {
466 /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
467 other shared bits were turned on.
468 */
469 return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
470 IB_ACCESS_SHARED_MR_USER_WRITE |
471 IB_ACCESS_SHARED_MR_GROUP_READ |
472 IB_ACCESS_SHARED_MR_GROUP_WRITE |
473 IB_ACCESS_SHARED_MR_OTHER_READ |
474 IB_ACCESS_SHARED_MR_OTHER_WRITE));
475
476 }
477
478 static void free_smr_info(struct mlx4_ib_mr *mr)
479 {
480 /* When master/parent shared mr is dereged there is
481 no ability to share this mr any more - its mr_id will be
482 returned to the kernel as part of ib_uverbs_dereg_mr
483 and may be allocated again as part of other reg_mr.
484 */
485 char name_buff[16];
486
487 sprintf(name_buff, "%X", mr->smr_info->mr_id);
488 /* Remove proc entry is checking internally that no operation
489 was strated on that proc fs file and if in the middle
490 current process will wait till end of operation.
491 That's why no sync mechanism is needed when we release
492 below the shared umem.
493 */
494 remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
495 kfree(mr->smr_info);
496 mr->smr_info = NULL;
497 }
498 #endif
499
mlx4_invalidate_umem(void * invalidation_cookie,struct ib_umem * umem,unsigned long addr,size_t size)500 static void mlx4_invalidate_umem(void *invalidation_cookie,
501 struct ib_umem *umem,
502 unsigned long addr, size_t size)
503 {
504 struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie;
505
506 /* This function is called under client peer lock so its resources are race protected */
507 if (atomic_inc_return(&mr->invalidated) > 1) {
508 umem->invalidation_ctx->inflight_invalidation = 1;
509 goto end;
510 }
511
512 umem->invalidation_ctx->peer_callback = 1;
513 mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr);
514 ib_umem_release(umem);
515 complete(&mr->invalidation_comp);
516
517 end:
518 return;
519
520 }
521
mlx4_ib_reg_user_mr(struct ib_pd * pd,u64 start,u64 length,u64 virt_addr,int access_flags,struct ib_udata * udata,int mr_id)522 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
523 u64 virt_addr, int access_flags,
524 struct ib_udata *udata,
525 int mr_id)
526 {
527 struct mlx4_ib_dev *dev = to_mdev(pd->device);
528 struct mlx4_ib_mr *mr;
529 int shift;
530 int err;
531 int n;
532 struct ib_peer_memory_client *ib_peer_mem;
533
534 mr = kzalloc(sizeof *mr, GFP_KERNEL);
535 if (!mr)
536 return ERR_PTR(-ENOMEM);
537
538 mr->umem = ib_umem_get_ex(pd->uobject->context, start, length,
539 access_flags, 0, 1);
540 if (IS_ERR(mr->umem)) {
541 err = PTR_ERR(mr->umem);
542 goto err_free;
543 }
544
545 ib_peer_mem = mr->umem->ib_peer_mem;
546 n = ib_umem_page_count(mr->umem);
547 shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
548 &n);
549 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
550 convert_access(access_flags), n, shift, &mr->mmr);
551 if (err)
552 goto err_umem;
553
554 err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
555 if (err)
556 goto err_mr;
557
558 err = mlx4_mr_enable(dev->dev, &mr->mmr);
559 if (err)
560 goto err_mr;
561
562 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
563 /* No suuport for Shared MR */
564 #if 0
565 /* Check whether MR should be shared */
566 if (is_shared_mr(access_flags)) {
567 /* start address and length must be aligned to page size in order
568 to map a full page and preventing leakage of data */
569 if (mr->umem->offset || (length & ~PAGE_MASK)) {
570 err = -EINVAL;
571 goto err_mr;
572 }
573
574 err = prepare_shared_mr(mr, access_flags, mr_id);
575 if (err)
576 goto err_mr;
577 }
578 #endif
579 if (ib_peer_mem) {
580 if (access_flags & IB_ACCESS_MW_BIND) {
581 /* Prevent binding MW on peer clients.
582 * mlx4_invalidate_umem must be void,
583 * therefore, mlx4_mr_free should not fail
584 * when using peer clients. */
585 err = -ENOSYS;
586 pr_err("MW is not supported with peer memory client");
587 goto err_smr;
588 }
589 init_completion(&mr->invalidation_comp);
590 ib_umem_activate_invalidation_notifier(mr->umem,
591 mlx4_invalidate_umem, mr);
592 }
593
594 atomic_set(&mr->invalidated, 0);
595 return &mr->ibmr;
596
597 err_smr:
598 /* No suuport for Shared MR */
599 #if 0
600 if (mr->smr_info)
601 free_smr_info(mr);
602 #endif
603 err_mr:
604 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
605
606 err_umem:
607 ib_umem_release(mr->umem);
608
609 err_free:
610 kfree(mr);
611
612 return ERR_PTR(err);
613 }
614
mlx4_ib_dereg_mr(struct ib_mr * ibmr)615 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
616 {
617 struct mlx4_ib_mr *mr = to_mmr(ibmr);
618 struct ib_umem *umem = mr->umem;
619 int ret;
620
621 /* No suuport for Shared MR */
622 #if 0
623 if (mr->smr_info)
624 free_smr_info(mr);
625 #endif
626
627 if (atomic_inc_return(&mr->invalidated) > 1) {
628 wait_for_completion(&mr->invalidation_comp);
629 goto end;
630 }
631
632 ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
633 if (ret) {
634 /* Error is not expected here, except when memory windows
635 * are bound to MR which is not supported with
636 * peer memory clients */
637 atomic_set(&mr->invalidated, 0);
638 return ret;
639 }
640
641 if (!umem)
642 goto end;
643
644 ib_umem_release(mr->umem);
645 end:
646
647 kfree(mr);
648
649 return 0;
650 }
651
mlx4_ib_alloc_mw(struct ib_pd * pd,enum ib_mw_type type)652 struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
653 {
654 struct mlx4_ib_dev *dev = to_mdev(pd->device);
655 struct mlx4_ib_mw *mw;
656 int err;
657
658 mw = kmalloc(sizeof(*mw), GFP_KERNEL);
659 if (!mw)
660 return ERR_PTR(-ENOMEM);
661
662 err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw);
663 if (err)
664 goto err_free;
665
666 err = mlx4_mw_enable(dev->dev, &mw->mmw);
667 if (err)
668 goto err_mw;
669
670 mw->ibmw.rkey = mw->mmw.key;
671
672 return &mw->ibmw;
673
674 err_mw:
675 mlx4_mw_free(dev->dev, &mw->mmw);
676
677 err_free:
678 kfree(mw);
679
680 return ERR_PTR(err);
681 }
682
mlx4_ib_bind_mw(struct ib_qp * qp,struct ib_mw * mw,struct ib_mw_bind * mw_bind)683 int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
684 struct ib_mw_bind *mw_bind)
685 {
686 struct ib_send_wr wr;
687 struct ib_send_wr *bad_wr;
688 int ret;
689
690 memset(&wr, 0, sizeof(wr));
691 wr.opcode = IB_WR_BIND_MW;
692 wr.wr_id = mw_bind->wr_id;
693 wr.send_flags = mw_bind->send_flags;
694 wr.wr.bind_mw.mw = mw;
695 wr.wr.bind_mw.bind_info = mw_bind->bind_info;
696 wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey);
697
698 ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
699 if (!ret)
700 mw->rkey = wr.wr.bind_mw.rkey;
701
702 return ret;
703 }
704
mlx4_ib_dealloc_mw(struct ib_mw * ibmw)705 int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
706 {
707 struct mlx4_ib_mw *mw = to_mmw(ibmw);
708
709 mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
710 kfree(mw);
711
712 return 0;
713 }
714
mlx4_ib_alloc_fast_reg_mr(struct ib_pd * pd,int max_page_list_len)715 struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
716 int max_page_list_len)
717 {
718 struct mlx4_ib_dev *dev = to_mdev(pd->device);
719 struct mlx4_ib_mr *mr;
720 int err;
721
722 mr = kzalloc(sizeof *mr, GFP_KERNEL);
723 if (!mr)
724 return ERR_PTR(-ENOMEM);
725
726 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
727 max_page_list_len, 0, &mr->mmr);
728 if (err)
729 goto err_free;
730
731 err = mlx4_mr_enable(dev->dev, &mr->mmr);
732 if (err)
733 goto err_mr;
734
735 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
736 mr->umem = NULL;
737
738 return &mr->ibmr;
739
740 err_mr:
741 (void) mlx4_mr_free(dev->dev, &mr->mmr);
742
743 err_free:
744 kfree(mr);
745 return ERR_PTR(err);
746 }
747
mlx4_ib_alloc_fast_reg_page_list(struct ib_device * ibdev,int page_list_len)748 struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
749 int page_list_len)
750 {
751 struct mlx4_ib_dev *dev = to_mdev(ibdev);
752 struct mlx4_ib_fast_reg_page_list *mfrpl;
753 int size = page_list_len * sizeof (u64);
754
755 if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
756 return ERR_PTR(-EINVAL);
757
758 mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
759 if (!mfrpl)
760 return ERR_PTR(-ENOMEM);
761
762 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
763 if (!mfrpl->ibfrpl.page_list)
764 goto err_free;
765
766 mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
767 size, &mfrpl->map,
768 GFP_KERNEL);
769 if (!mfrpl->mapped_page_list)
770 goto err_free;
771
772 WARN_ON(mfrpl->map & 0x3f);
773
774 return &mfrpl->ibfrpl;
775
776 err_free:
777 kfree(mfrpl->ibfrpl.page_list);
778 kfree(mfrpl);
779 return ERR_PTR(-ENOMEM);
780 }
781
mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list * page_list)782 void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
783 {
784 struct mlx4_ib_dev *dev = to_mdev(page_list->device);
785 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
786 int size = page_list->max_page_list_len * sizeof (u64);
787
788 dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
789 mfrpl->map);
790 kfree(mfrpl->ibfrpl.page_list);
791 kfree(mfrpl);
792 }
793
mlx4_ib_fmr_alloc(struct ib_pd * pd,int acc,struct ib_fmr_attr * fmr_attr)794 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
795 struct ib_fmr_attr *fmr_attr)
796 {
797 struct mlx4_ib_dev *dev = to_mdev(pd->device);
798 struct mlx4_ib_fmr *fmr;
799 int err = -ENOMEM;
800
801 fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
802 if (!fmr)
803 return ERR_PTR(-ENOMEM);
804
805 err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
806 fmr_attr->max_pages, fmr_attr->max_maps,
807 fmr_attr->page_shift, &fmr->mfmr);
808 if (err)
809 goto err_free;
810
811 err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
812 if (err)
813 goto err_mr;
814
815 fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
816
817 return &fmr->ibfmr;
818
819 err_mr:
820 (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
821
822 err_free:
823 kfree(fmr);
824
825 return ERR_PTR(err);
826 }
827
mlx4_ib_map_phys_fmr(struct ib_fmr * ibfmr,u64 * page_list,int npages,u64 iova)828 int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
829 int npages, u64 iova)
830 {
831 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
832 struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
833
834 return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
835 &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
836 }
837
mlx4_ib_unmap_fmr(struct list_head * fmr_list)838 int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
839 {
840 struct ib_fmr *ibfmr;
841 int err;
842 struct mlx4_dev *mdev = NULL;
843
844 list_for_each_entry(ibfmr, fmr_list, list) {
845 if (mdev && to_mdev(ibfmr->device)->dev != mdev)
846 return -EINVAL;
847 mdev = to_mdev(ibfmr->device)->dev;
848 }
849
850 if (!mdev)
851 return 0;
852
853 list_for_each_entry(ibfmr, fmr_list, list) {
854 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
855
856 mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
857 }
858
859 /*
860 * Make sure all MPT status updates are visible before issuing
861 * SYNC_TPT firmware command.
862 */
863 wmb();
864
865 err = mlx4_SYNC_TPT(mdev);
866 if (err)
867 pr_warn("SYNC_TPT error %d when "
868 "unmapping FMRs\n", err);
869
870 return 0;
871 }
872
mlx4_ib_fmr_dealloc(struct ib_fmr * ibfmr)873 int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
874 {
875 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
876 struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
877 int err;
878
879 err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
880
881 if (!err)
882 kfree(ifmr);
883
884 return err;
885 }
886