1 /*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #include <linux/slab.h>
35 #include <linux/module.h>
36 #include <linux/sched.h>
37
38 #ifdef __linux__
39 #include <linux/proc_fs.h>
40 #include <linux/cred.h>
41 #endif
42
43 #include "mlx4_ib.h"
44
convert_access(int acc)45 static u32 convert_access(int acc)
46 {
47 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
48 (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
49 (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
50 (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
51 MLX4_PERM_LOCAL_READ;
52 }
53 #ifdef __linux__
shared_mr_proc_read(struct file * file,char __user * buffer,size_t len,loff_t * offset)54 static ssize_t shared_mr_proc_read(struct file *file,
55 char __user *buffer,
56 size_t len,
57 loff_t *offset)
58 {
59
60 return -ENOSYS;
61
62 }
63
shared_mr_proc_write(struct file * file,const char __user * buffer,size_t len,loff_t * offset)64 static ssize_t shared_mr_proc_write(struct file *file,
65 const char __user *buffer,
66 size_t len,
67 loff_t *offset)
68 {
69
70 return -ENOSYS;
71 }
72
shared_mr_mmap(struct file * filep,struct vm_area_struct * vma)73 static int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
74 {
75
76 struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
77 struct mlx4_shared_mr_info *smr_info =
78 (struct mlx4_shared_mr_info *)pde->data;
79
80 /* Prevent any mapping not on start of area */
81 if (vma->vm_pgoff != 0)
82 return -EINVAL;
83
84 return ib_umem_map_to_vma(smr_info->umem,
85 vma);
86
87 }
88
89 static const struct file_operations shared_mr_proc_ops = {
90 .owner = THIS_MODULE,
91 .read = shared_mr_proc_read,
92 .write = shared_mr_proc_write,
93 .mmap = shared_mr_mmap
94 };
95
convert_shared_access(int acc)96 static mode_t convert_shared_access(int acc)
97 {
98
99 return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) |
100 (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) |
101 (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) |
102 (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) |
103 (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) |
104 (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0);
105
106 }
107 #endif
mlx4_ib_get_dma_mr(struct ib_pd * pd,int acc)108 struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
109 {
110 struct mlx4_ib_mr *mr;
111 int err;
112
113 mr = kzalloc(sizeof *mr, GFP_KERNEL);
114 if (!mr)
115 return ERR_PTR(-ENOMEM);
116
117 err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
118 ~0ull, convert_access(acc), 0, 0, &mr->mmr);
119 if (err)
120 goto err_free;
121
122 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
123 if (err)
124 goto err_mr;
125
126 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
127 mr->umem = NULL;
128
129 return &mr->ibmr;
130
131 err_mr:
132 mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
133
134 err_free:
135 kfree(mr);
136
137 return ERR_PTR(err);
138 }
139
mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev * dev,struct mlx4_mtt * mtt,u64 mtt_size,u64 mtt_shift,u64 len,u64 cur_start_addr,u64 * pages,int * start_index,int * npages)140 static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
141 struct mlx4_mtt *mtt,
142 u64 mtt_size,
143 u64 mtt_shift,
144 u64 len,
145 u64 cur_start_addr,
146 u64 *pages,
147 int *start_index,
148 int *npages)
149 {
150 int k;
151 int err = 0;
152 u64 mtt_entries;
153 u64 cur_end_addr = cur_start_addr + len;
154 u64 cur_end_addr_aligned = 0;
155
156 len += (cur_start_addr & (mtt_size-1ULL));
157 cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
158 len += (cur_end_addr_aligned - cur_end_addr);
159 if (len & (mtt_size-1ULL)) {
160 WARN(1 ,
161 "write_block: len %llx is not aligned to mtt_size %llx\n",
162 (long long)len, (long long)mtt_size);
163 return -EINVAL;
164 }
165
166
167 mtt_entries = (len >> mtt_shift);
168
169 /* Align the MTT start address to
170 the mtt_size.
171 Required to handle cases when the MR
172 starts in the middle of an MTT record.
173 Was not required in old code since
174 the physical addresses provided by
175 the dma subsystem were page aligned,
176 which was also the MTT size.
177 */
178 cur_start_addr = round_down(cur_start_addr, mtt_size);
179 /* A new block is started ...*/
180 for (k = 0; k < mtt_entries; ++k) {
181 pages[*npages] = cur_start_addr + (mtt_size * k);
182 (*npages)++;
183 /*
184 * Be friendly to mlx4_write_mtt() and
185 * pass it chunks of appropriate size.
186 */
187 if (*npages == PAGE_SIZE / sizeof(u64)) {
188 err = mlx4_write_mtt(dev->dev,
189 mtt, *start_index,
190 *npages, pages);
191 if (err)
192 return err;
193
194 (*start_index) += *npages;
195 *npages = 0;
196 }
197 }
198
199 return 0;
200 }
201
mlx4_ib_umem_write_mtt(struct mlx4_ib_dev * dev,struct mlx4_mtt * mtt,struct ib_umem * umem)202 int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
203 struct ib_umem *umem)
204 {
205 u64 *pages;
206 struct ib_umem_chunk *chunk;
207 int j;
208 u64 len = 0;
209 int err = 0;
210 u64 mtt_size;
211 u64 cur_start_addr = 0;
212 u64 mtt_shift;
213 int start_index = 0;
214 int npages = 0;
215
216 pages = (u64 *) __get_free_page(GFP_KERNEL);
217 if (!pages)
218 return -ENOMEM;
219
220 mtt_shift = mtt->page_shift;
221 mtt_size = 1ULL << mtt_shift;
222
223 list_for_each_entry(chunk, &umem->chunk_list, list)
224 for (j = 0; j < chunk->nmap; ++j) {
225 if (cur_start_addr + len ==
226 sg_dma_address(&chunk->page_list[j])) {
227 /* still the same block */
228 len += sg_dma_len(&chunk->page_list[j]);
229 continue;
230 }
231 /* A new block is started ...*/
232 /* If len is malaligned, write an extra mtt entry to
233 cover the misaligned area (round up the division)
234 */
235 err = mlx4_ib_umem_write_mtt_block(dev,
236 mtt, mtt_size, mtt_shift,
237 len, cur_start_addr,
238 pages,
239 &start_index,
240 &npages);
241 if (err)
242 goto out;
243
244 cur_start_addr =
245 sg_dma_address(&chunk->page_list[j]);
246 len = sg_dma_len(&chunk->page_list[j]);
247 }
248
249 /* Handle the last block */
250 if (len > 0) {
251 /* If len is malaligned, write an extra mtt entry to cover
252 the misaligned area (round up the division)
253 */
254 err = mlx4_ib_umem_write_mtt_block(dev,
255 mtt, mtt_size, mtt_shift,
256 len, cur_start_addr,
257 pages,
258 &start_index,
259 &npages);
260 if (err)
261 goto out;
262 }
263
264
265 if (npages)
266 err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
267
268 out:
269 free_page((unsigned long) pages);
270 return err;
271 }
272
alignment_of(u64 ptr)273 static inline u64 alignment_of(u64 ptr)
274 {
275 return ilog2(ptr & (~(ptr-1)));
276 }
277
mlx4_ib_umem_calc_block_mtt(u64 next_block_start,u64 current_block_end,u64 block_shift)278 static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
279 u64 current_block_end,
280 u64 block_shift)
281 {
282 /* Check whether the alignment of the new block
283 is aligned as well as the previous block.
284 Block address must start with zeros till size of entity_size.
285 */
286 if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
287 /* It is not as well aligned as the
288 previous block-reduce the mtt size
289 accordingly.
290 Here we take the last right bit
291 which is 1.
292 */
293 block_shift = alignment_of(next_block_start);
294
295 /* Check whether the alignment of the
296 end of previous block - is it aligned
297 as well as the start of the block
298 */
299 if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
300 /* It is not as well aligned as
301 the start of the block - reduce the
302 mtt size accordingly.
303 */
304 block_shift = alignment_of(current_block_end);
305
306 return block_shift;
307 }
308
309 /* Calculate optimal mtt size based on contiguous pages.
310 * Function will return also the number of pages that are not aligned to the
311 calculated mtt_size to be added to total number
312 of pages. For that we should check the first chunk length & last chunk
313 length and if not aligned to mtt_size we should increment
314 the non_aligned_pages number.
315 All chunks in the middle already handled as part of mtt shift calculation
316 for both their start & end addresses.
317 */
mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem * umem,u64 start_va,int * num_of_mtts)318 int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
319 u64 start_va,
320 int *num_of_mtts)
321 {
322 struct ib_umem_chunk *chunk;
323 int j;
324 u64 block_shift = MLX4_MAX_MTT_SHIFT;
325 u64 current_block_len = 0;
326 u64 current_block_start = 0;
327 u64 misalignment_bits;
328 u64 first_block_start = 0;
329 u64 last_block_end = 0;
330 u64 total_len = 0;
331 u64 last_block_aligned_end = 0;
332 u64 min_shift = ilog2(umem->page_size);
333
334 list_for_each_entry(chunk, &umem->chunk_list, list) {
335 /* Initialization - save the first chunk start as
336 the current_block_start - block means contiguous pages.
337 */
338 if (current_block_len == 0 && current_block_start == 0) {
339 first_block_start = current_block_start =
340 sg_dma_address(&chunk->page_list[0]);
341 /* Find the bits that are different between
342 the physical address and the virtual
343 address for the start of the MR.
344 */
345 /* umem_get aligned the start_va to a page
346 boundry. Therefore, we need to align the
347 start va to the same boundry */
348 /* misalignment_bits is needed to handle the
349 case of a single memory region. In this
350 case, the rest of the logic will not reduce
351 the block size. If we use a block size
352 which is bigger than the alignment of the
353 misalignment bits, we might use the virtual
354 page number instead of the physical page
355 number, resulting in access to the wrong
356 data. */
357 misalignment_bits =
358 (start_va & (~(((u64)(umem->page_size))-1ULL)))
359 ^ current_block_start;
360 block_shift = min(alignment_of(misalignment_bits)
361 , block_shift);
362 }
363
364 /* Go over the scatter entries in the current chunk, check
365 if they continue the previous scatter entry.
366 */
367 for (j = 0; j < chunk->nmap; ++j) {
368 u64 next_block_start =
369 sg_dma_address(&chunk->page_list[j]);
370 u64 current_block_end = current_block_start
371 + current_block_len;
372 /* If we have a split (non-contig.) between two block*/
373 if (current_block_end != next_block_start) {
374 block_shift = mlx4_ib_umem_calc_block_mtt(
375 next_block_start,
376 current_block_end,
377 block_shift);
378
379 /* If we reached the minimum shift for 4k
380 page we stop the loop.
381 */
382 if (block_shift <= min_shift)
383 goto end;
384
385 /* If not saved yet we are in first block -
386 we save the length of first block to
387 calculate the non_aligned_pages number at
388 * the end.
389 */
390 total_len += current_block_len;
391
392 /* Start a new block */
393 current_block_start = next_block_start;
394 current_block_len =
395 sg_dma_len(&chunk->page_list[j]);
396 continue;
397 }
398 /* The scatter entry is another part of
399 the current block, increase the block size
400 * An entry in the scatter can be larger than
401 4k (page) as of dma mapping
402 which merge some blocks together.
403 */
404 current_block_len +=
405 sg_dma_len(&chunk->page_list[j]);
406 }
407 }
408
409 /* Account for the last block in the total len */
410 total_len += current_block_len;
411 /* Add to the first block the misalignment that it suffers from.*/
412 total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
413 last_block_end = current_block_start+current_block_len;
414 last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
415 total_len += (last_block_aligned_end - last_block_end);
416
417 WARN((total_len & ((1ULL<<block_shift)-1ULL)),
418 " misaligned total length detected (%llu, %llu)!",
419 (long long)total_len, (long long)block_shift);
420
421 *num_of_mtts = total_len >> block_shift;
422 end:
423 if (block_shift < min_shift) {
424 /* If shift is less than the min we set a WARN and
425 return the min shift.
426 */
427 WARN(1,
428 "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
429 (long long)block_shift);
430
431 block_shift = min_shift;
432 }
433 return block_shift;
434 }
435
436 #ifdef __linux__
prepare_shared_mr(struct mlx4_ib_mr * mr,int access_flags,int mr_id)437 static int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
438 {
439 struct proc_dir_entry *mr_proc_entry;
440 mode_t mode = S_IFREG;
441 char name_buff[16];
442
443 mode |= convert_shared_access(access_flags);
444 sprintf(name_buff, "%X", mr_id);
445 mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
446 mr->smr_info->mr_id = mr_id;
447 mr->smr_info->umem = mr->umem;
448
449 mr_proc_entry = proc_create_data(name_buff, mode,
450 mlx4_mrs_dir_entry,
451 &shared_mr_proc_ops,
452 mr->smr_info);
453
454 if (!mr_proc_entry) {
455 pr_err("prepare_shared_mr failed via proc\n");
456 kfree(mr->smr_info);
457 return -ENODEV;
458 }
459
460 current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
461 mr_proc_entry->size = mr->umem->length;
462 return 0;
463
464 }
is_shared_mr(int access_flags)465 static int is_shared_mr(int access_flags)
466 {
467 /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
468 other shared bits were turned on.
469 */
470 return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
471 IB_ACCESS_SHARED_MR_USER_WRITE |
472 IB_ACCESS_SHARED_MR_GROUP_READ |
473 IB_ACCESS_SHARED_MR_GROUP_WRITE |
474 IB_ACCESS_SHARED_MR_OTHER_READ |
475 IB_ACCESS_SHARED_MR_OTHER_WRITE));
476
477 }
478 #endif
479
mlx4_ib_reg_user_mr(struct ib_pd * pd,u64 start,u64 length,u64 virt_addr,int access_flags,struct ib_udata * udata,int mr_id)480 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
481 u64 virt_addr, int access_flags,
482 struct ib_udata *udata,
483 int mr_id)
484 {
485 struct mlx4_ib_dev *dev = to_mdev(pd->device);
486 struct mlx4_ib_mr *mr;
487 int shift;
488 int err;
489 int n;
490
491 mr = kzalloc(sizeof *mr, GFP_KERNEL);
492 if (!mr)
493 return ERR_PTR(-ENOMEM);
494
495 mr->umem = ib_umem_get(pd->uobject->context, start, length,
496 access_flags, 0);
497 if (IS_ERR(mr->umem)) {
498 err = PTR_ERR(mr->umem);
499 goto err_free;
500 }
501
502 n = ib_umem_page_count(mr->umem);
503 shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
504 &n);
505 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
506 convert_access(access_flags), n, shift, &mr->mmr);
507 if (err)
508 goto err_umem;
509
510 err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
511 if (err)
512 goto err_mr;
513
514 err = mlx4_mr_enable(dev->dev, &mr->mmr);
515 if (err)
516 goto err_mr;
517
518 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
519 #ifdef __linux__
520 /* Check whether MR should be shared */
521 if (is_shared_mr(access_flags)) {
522 /* start address and length must be aligned to page size in order
523 to map a full page and preventing leakage of data */
524 if (mr->umem->offset || (length & ~PAGE_MASK)) {
525 err = -EINVAL;
526 goto err_mr;
527 }
528
529 err = prepare_shared_mr(mr, access_flags, mr_id);
530 if (err)
531 goto err_mr;
532 }
533 #endif
534 return &mr->ibmr;
535
536 err_mr:
537 mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
538
539 err_umem:
540 ib_umem_release(mr->umem);
541
542 err_free:
543 kfree(mr);
544
545 return ERR_PTR(err);
546 }
547
548
mlx4_ib_dereg_mr(struct ib_mr * ibmr)549 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
550 {
551 struct mlx4_ib_mr *mr = to_mmr(ibmr);
552
553 mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
554 if (mr->smr_info) {
555 /* When master/parent shared mr is dereged there is
556 no ability to share this mr any more - its mr_id will be
557 returned to the kernel as part of ib_uverbs_dereg_mr
558 and may be allocated again as part of other reg_mr.
559 */
560 char name_buff[16];
561
562 sprintf(name_buff, "%X", mr->smr_info->mr_id);
563 /* Remove proc entry is checking internally that no operation
564 was strated on that proc fs file and if in the middle
565 current process will wait till end of operation.
566 That's why no sync mechanism is needed when we release
567 below the shared umem.
568 */
569 #ifdef __linux__
570 remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
571 kfree(mr->smr_info);
572 #endif
573 }
574
575 if (mr->umem)
576 ib_umem_release(mr->umem);
577
578 kfree(mr);
579
580 return 0;
581 }
582
mlx4_ib_alloc_fast_reg_mr(struct ib_pd * pd,int max_page_list_len)583 struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
584 int max_page_list_len)
585 {
586 struct mlx4_ib_dev *dev = to_mdev(pd->device);
587 struct mlx4_ib_mr *mr;
588 int err;
589
590 mr = kzalloc(sizeof *mr, GFP_KERNEL);
591 if (!mr)
592 return ERR_PTR(-ENOMEM);
593
594 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
595 max_page_list_len, 0, &mr->mmr);
596 if (err)
597 goto err_free;
598
599 err = mlx4_mr_enable(dev->dev, &mr->mmr);
600 if (err)
601 goto err_mr;
602
603 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
604 mr->umem = NULL;
605
606 return &mr->ibmr;
607
608 err_mr:
609 mlx4_mr_free(dev->dev, &mr->mmr);
610
611 err_free:
612 kfree(mr);
613 return ERR_PTR(err);
614 }
615
mlx4_ib_alloc_fast_reg_page_list(struct ib_device * ibdev,int page_list_len)616 struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
617 int page_list_len)
618 {
619 struct mlx4_ib_dev *dev = to_mdev(ibdev);
620 struct mlx4_ib_fast_reg_page_list *mfrpl;
621 int size = page_list_len * sizeof (u64);
622
623 if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
624 return ERR_PTR(-EINVAL);
625
626 mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
627 if (!mfrpl)
628 return ERR_PTR(-ENOMEM);
629
630 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
631 if (!mfrpl->ibfrpl.page_list)
632 goto err_free;
633
634 mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
635 size, &mfrpl->map,
636 GFP_KERNEL);
637 if (!mfrpl->mapped_page_list)
638 goto err_free;
639
640 WARN_ON(mfrpl->map & 0x3f);
641
642 return &mfrpl->ibfrpl;
643
644 err_free:
645 kfree(mfrpl->ibfrpl.page_list);
646 kfree(mfrpl);
647 return ERR_PTR(-ENOMEM);
648 }
649
mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list * page_list)650 void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
651 {
652 struct mlx4_ib_dev *dev = to_mdev(page_list->device);
653 struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
654 int size = page_list->max_page_list_len * sizeof (u64);
655
656 dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
657 mfrpl->map);
658 kfree(mfrpl->ibfrpl.page_list);
659 kfree(mfrpl);
660 }
661
mlx4_ib_fmr_alloc(struct ib_pd * pd,int acc,struct ib_fmr_attr * fmr_attr)662 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
663 struct ib_fmr_attr *fmr_attr)
664 {
665 struct mlx4_ib_dev *dev = to_mdev(pd->device);
666 struct mlx4_ib_fmr *fmr;
667 int err = -ENOMEM;
668
669 fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
670 if (!fmr)
671 return ERR_PTR(-ENOMEM);
672
673 err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
674 fmr_attr->max_pages, fmr_attr->max_maps,
675 fmr_attr->page_shift, &fmr->mfmr);
676 if (err)
677 goto err_free;
678
679 err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
680 if (err)
681 goto err_mr;
682
683 fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
684
685 return &fmr->ibfmr;
686
687 err_mr:
688 mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
689
690 err_free:
691 kfree(fmr);
692
693 return ERR_PTR(err);
694 }
695
mlx4_ib_map_phys_fmr(struct ib_fmr * ibfmr,u64 * page_list,int npages,u64 iova)696 int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
697 int npages, u64 iova)
698 {
699 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
700 struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
701
702 return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
703 &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
704 }
705
mlx4_ib_unmap_fmr(struct list_head * fmr_list)706 int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
707 {
708 struct ib_fmr *ibfmr;
709 int err;
710 struct mlx4_dev *mdev = NULL;
711
712 list_for_each_entry(ibfmr, fmr_list, list) {
713 if (mdev && to_mdev(ibfmr->device)->dev != mdev)
714 return -EINVAL;
715 mdev = to_mdev(ibfmr->device)->dev;
716 }
717
718 if (!mdev)
719 return 0;
720
721 list_for_each_entry(ibfmr, fmr_list, list) {
722 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
723
724 mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
725 }
726
727 /*
728 * Make sure all MPT status updates are visible before issuing
729 * SYNC_TPT firmware command.
730 */
731 wmb();
732
733 err = mlx4_SYNC_TPT(mdev);
734 if (err)
735 pr_warn("SYNC_TPT error %d when "
736 "unmapping FMRs\n", err);
737
738 return 0;
739 }
740
mlx4_ib_fmr_dealloc(struct ib_fmr * ibfmr)741 int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
742 {
743 struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
744 struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
745 int err;
746
747 err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
748
749 if (!err)
750 kfree(ifmr);
751
752 return err;
753 }
754