1 /** $MirOS: src/sys/uvm/uvm_mmap.c,v 1.5 2014/07/13 12:35:45 tg Exp $ */
2 /* $OpenBSD: uvm_mmap.c,v 1.55 2005/01/15 06:54:51 otto Exp $ */
3 /* +OpenBSD: uvm_mmap.c,v 1.91 2012/07/21 06:46:58 matthew Exp $ */
4 /* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
5
6 /*
7 * Copyright (c) 1997 Charles D. Cranor and Washington University.
8 * Copyright (c) 1991, 1993 The Regents of the University of California.
9 * Copyright (c) 1988 University of Utah.
10 *
11 * All rights reserved.
12 *
13 * This code is derived from software contributed to Berkeley by
14 * the Systems Programming Group of the University of Utah Computer
15 * Science Department.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. All advertising materials mentioning features or use of this software
26 * must display the following acknowledgement:
27 * This product includes software developed by the Charles D. Cranor,
28 * Washington University, University of California, Berkeley and
29 * its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44 * SUCH DAMAGE.
45 *
46 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
47 * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
48 * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
49 */
50
51 /*
52 * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
53 * function.
54 */
55 #include <sys/param.h>
56 #include <sys/systm.h>
57 #include <sys/file.h>
58 #include <sys/filedesc.h>
59 #include <sys/resourcevar.h>
60 #include <sys/mman.h>
61 #include <sys/mount.h>
62 #include <sys/proc.h>
63 #include <sys/malloc.h>
64 #include <sys/vnode.h>
65 #include <sys/conf.h>
66 #include <sys/stat.h>
67
68 #include <machine/exec.h> /* for __LDPGSZ */
69
70 #include <miscfs/specfs/specdev.h>
71
72 #include <sys/syscallargs.h>
73
74 #include <uvm/uvm.h>
75 #include <uvm/uvm_device.h>
76 #include <uvm/uvm_vnode.h>
77
78
79 /*
80 * unimplemented VM system calls:
81 */
82
83 /*
84 * sys_mquery: provide mapping hints to applications that do fixed mappings
85 *
86 * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
87 * don't care about PMAP_PREFER or such)
88 * addr: hint where we'd like to place the mapping.
89 * size: size of the mapping
90 * fd: fd of the file we want to map
91 * off: offset within the file
92 */
93
94 int
sys_mquery(p,v,retval)95 sys_mquery(p, v, retval)
96 struct proc *p;
97 void *v;
98 register_t *retval;
99 {
100 struct sys_mquery_args /* {
101 syscallarg(void *) addr;
102 syscallarg(size_t) len;
103 syscallarg(int) prot;
104 syscallarg(int) flags;
105 syscallarg(int) fd;
106 syscallarg(long) pad;
107 syscallarg(off_t) pos;
108 } */ *uap = v;
109 struct file *fp;
110 struct uvm_object *uobj;
111 voff_t uoff;
112 int error;
113 vaddr_t vaddr;
114 int flags = 0;
115 vsize_t size;
116 vm_prot_t prot;
117 int fd;
118
119 vaddr = (vaddr_t) SCARG(uap, addr);
120 prot = SCARG(uap, prot);
121 size = (vsize_t) SCARG(uap, len);
122 fd = SCARG(uap, fd);
123
124 if ((prot & VM_PROT_ALL) != prot)
125 return (EINVAL);
126
127 if (SCARG(uap, flags) & MAP_FIXED)
128 flags |= UVM_FLAG_FIXED;
129
130 if (fd >= 0) {
131 if ((error = getvnode(p->p_fd, fd, &fp)) != 0)
132 return (error);
133 uobj = &((struct vnode *)fp->f_data)->v_uvm.u_obj;
134 uoff = SCARG(uap, pos);
135 } else {
136 fp = NULL;
137 uobj = NULL;
138 uoff = 0;
139 }
140
141 if (vaddr == 0)
142 vaddr = uvm_map_hint(p, prot);
143
144 /* prevent a user requested address from falling in heap space */
145 if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr) &&
146 (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
147 if (flags & UVM_FLAG_FIXED) {
148 error = EINVAL;
149 goto done;
150 }
151 vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ);
152 }
153 again:
154
155 if (uvm_map_findspace(&p->p_vmspace->vm_map, vaddr, size,
156 &vaddr, uobj, uoff, 0, flags) == NULL) {
157 if (flags & UVM_FLAG_FIXED)
158 error = EINVAL;
159 else
160 error = ENOMEM;
161 } else {
162 /* prevent a returned address from falling in heap space */
163 if ((vaddr + size > (vaddr_t)p->p_vmspace->vm_daddr)
164 && (vaddr < (vaddr_t)p->p_vmspace->vm_daddr + MAXDSIZ)) {
165 vaddr = round_page((vaddr_t)p->p_vmspace->vm_daddr +
166 MAXDSIZ);
167 goto again;
168 }
169 error = 0;
170 *retval = (register_t)(vaddr);
171 }
172 done:
173 if (fp != NULL)
174 FRELE(fp);
175 return (error);
176 }
177
178 /*
179 * sys_mincore: determine if pages are in core or not.
180 */
181
182 /* ARGSUSED */
183 int
sys_mincore(p,v,retval)184 sys_mincore(p, v, retval)
185 struct proc *p;
186 void *v;
187 register_t *retval;
188 {
189 struct sys_mincore_args /* {
190 syscallarg(void *) addr;
191 syscallarg(size_t) len;
192 syscallarg(char *) vec;
193 } */ *uap = v;
194 vm_page_t m;
195 char *vec, pgi;
196 struct uvm_object *uobj;
197 struct vm_amap *amap;
198 struct vm_anon *anon;
199 vm_map_entry_t entry;
200 vaddr_t start, end, lim;
201 vm_map_t map;
202 vsize_t len;
203 int error = 0, npgs;
204
205 map = &p->p_vmspace->vm_map;
206
207 start = (vaddr_t)SCARG(uap, addr);
208 len = SCARG(uap, len);
209 vec = SCARG(uap, vec);
210
211 if (start & PAGE_MASK)
212 return (EINVAL);
213 len = round_page(len);
214 end = start + len;
215 if (end <= start)
216 return (EINVAL);
217
218 npgs = len >> PAGE_SHIFT;
219
220 /*
221 * Lock down vec, so our returned status isn't outdated by
222 * storing the status byte for a page.
223 */
224 if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0)
225 return (error);
226
227 vm_map_lock_read(map);
228
229 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
230 error = ENOMEM;
231 goto out;
232 }
233
234 for (/* nothing */;
235 entry != &map->header && entry->start < end;
236 entry = entry->next) {
237 KASSERT(!UVM_ET_ISSUBMAP(entry));
238 KASSERT(start >= entry->start);
239
240 /* Make sure there are no holes. */
241 if (entry->end < end &&
242 (entry->next == &map->header ||
243 entry->next->start > entry->end)) {
244 error = ENOMEM;
245 goto out;
246 }
247
248 lim = end < entry->end ? end : entry->end;
249
250 /*
251 * Special case for objects with no "real" pages. Those
252 * are always considered resident (mapped devices).
253 */
254 if (UVM_ET_ISOBJ(entry)) {
255 KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
256 if (entry->object.uvm_obj->pgops->pgo_releasepg
257 == NULL) {
258 pgi = 1;
259 for (/* nothing */; start < lim;
260 start += PAGE_SIZE, vec++)
261 copyout(&pgi, vec, sizeof(char));
262 continue;
263 }
264 }
265
266 amap = entry->aref.ar_amap; /* top layer */
267 uobj = entry->object.uvm_obj; /* bottom layer */
268
269 if (amap != NULL)
270 amap_lock(amap);
271 if (uobj != NULL)
272 simple_lock(&uobj->vmobjlock);
273
274 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
275 pgi = 0;
276 if (amap != NULL) {
277 /* Check the top layer first. */
278 anon = amap_lookup(&entry->aref,
279 start - entry->start);
280 /* Don't need to lock anon here. */
281 if (anon != NULL && anon->u.an_page != NULL) {
282 /*
283 * Anon has the page for this entry
284 * offset.
285 */
286 pgi = 1;
287 }
288 }
289
290 if (uobj != NULL && pgi == 0) {
291 /* Check the bottom layer. */
292 m = uvm_pagelookup(uobj,
293 entry->offset + (start - entry->start));
294 if (m != NULL) {
295 /*
296 * Object has the page for this entry
297 * offset.
298 */
299 pgi = 1;
300 }
301 }
302
303 copyout(&pgi, vec, sizeof(char));
304 }
305
306 if (uobj != NULL)
307 simple_unlock(&uobj->vmobjlock);
308 if (amap != NULL)
309 amap_unlock(amap);
310 }
311
312 out:
313 vm_map_unlock_read(map);
314 uvm_vsunlock(p, SCARG(uap, vec), npgs);
315 return (error);
316 }
317
318 /*
319 * sys_mmap: mmap system call.
320 *
321 * => file offset and address may not be page aligned
322 * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
323 * - if address isn't page aligned the mapping starts at trunc_page(addr)
324 * and the return value is adjusted up by the page offset.
325 */
326
327 int
sys_mmap(p,v,retval)328 sys_mmap(p, v, retval)
329 struct proc *p;
330 void *v;
331 register_t *retval;
332 {
333 struct sys_mmap_args /* {
334 syscallarg(void *) addr;
335 syscallarg(size_t) len;
336 syscallarg(int) prot;
337 syscallarg(int) flags;
338 syscallarg(int) fd;
339 syscallarg(long) pad;
340 syscallarg(off_t) pos;
341 } */ *uap = v;
342 vaddr_t addr;
343 struct vattr va;
344 off_t pos;
345 vsize_t size, pageoff;
346 vm_prot_t prot, maxprot;
347 int flags, fd;
348 vaddr_t vm_min_address = VM_MIN_ADDRESS;
349 struct filedesc *fdp = p->p_fd;
350 struct file *fp = NULL;
351 struct vnode *vp;
352 caddr_t handle;
353 int error;
354
355 /*
356 * first, extract syscall args from the uap.
357 */
358
359 addr = (vaddr_t) SCARG(uap, addr);
360 size = (vsize_t) SCARG(uap, len);
361 prot = SCARG(uap, prot);
362 flags = SCARG(uap, flags);
363 fd = SCARG(uap, fd);
364 pos = SCARG(uap, pos);
365
366 /*
367 * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
368 * validate the flags.
369 */
370 if ((prot & VM_PROT_ALL) != prot)
371 return (EINVAL);
372 if ((flags & MAP_FLAGMASK) != flags)
373 return (EINVAL);
374 if (flags & MAP_COPY)
375 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
376 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
377 return (EINVAL);
378 if ((flags & (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE)
379 return (EINVAL);
380
381 /*
382 * align file position and save offset. adjust size.
383 */
384
385 pageoff = (pos & PAGE_MASK);
386 pos -= pageoff;
387 size += pageoff; /* add offset */
388 if (size != 0) {
389 size = (vsize_t) round_page(size); /* round up */
390 if (size == 0)
391 return (ENOMEM); /* don't allow wrap */
392 }
393
394 /*
395 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
396 */
397
398 if (flags & MAP_FIXED) {
399
400 /* ensure address and file offset are aligned properly */
401 addr -= pageoff;
402 if (addr & PAGE_MASK)
403 return (EINVAL);
404
405 if (VM_MAXUSER_ADDRESS > 0 &&
406 (addr + size) > VM_MAXUSER_ADDRESS)
407 return (EINVAL);
408 if (vm_min_address > 0 && addr < vm_min_address)
409 return (EINVAL);
410 if (addr > addr + size)
411 return (EINVAL); /* no wrapping! */
412
413 } else {
414
415 /*
416 * not fixed: make sure we skip over the largest possible heap.
417 * we will refine our guess later (e.g. to account for VAC, etc)
418 */
419 if (addr == 0)
420 addr = uvm_map_hint(p, prot);
421 else if (!(flags & MAP_TRYFIXED) &&
422 addr < (vaddr_t)p->p_vmspace->vm_daddr)
423 addr = uvm_map_hint(p, prot);
424 }
425
426 /*
427 * check for file mappings (i.e. not anonymous) and verify file.
428 */
429 if ((flags & MAP_ANON) == 0) {
430
431 if ((fp = fd_getfile(fdp, fd)) == NULL)
432 return (EBADF);
433
434 FREF(fp);
435
436 if (fp->f_type != DTYPE_VNODE) {
437 error = ENODEV; /* only mmap vnodes! */
438 goto out;
439 }
440 vp = (struct vnode *)fp->f_data; /* convert to vnode */
441
442 if (vp->v_type != VREG && vp->v_type != VCHR &&
443 vp->v_type != VBLK) {
444 error = ENODEV; /* only REG/CHR/BLK support mmap */
445 goto out;
446 }
447
448 if (vp->v_type == VREG && (pos + size) < pos) {
449 error = EINVAL; /* no offset wrapping */
450 goto out;
451 }
452
453 /* special case: catch SunOS style /dev/zero */
454 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
455 flags |= MAP_ANON;
456 FRELE(fp);
457 fp = NULL;
458 goto is_anon;
459 }
460
461 /*
462 * Old programs may not select a specific sharing type, so
463 * default to an appropriate one.
464 *
465 * XXX: how does MAP_ANON fit in the picture?
466 */
467 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
468 #if defined(DEBUG)
469 printf("WARNING: defaulted mmap() share type to "
470 "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
471 "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
472 p->p_comm);
473 #endif
474 if (vp->v_type == VCHR)
475 flags |= MAP_SHARED; /* for a device */
476 else
477 flags |= MAP_PRIVATE; /* for a file */
478 }
479
480 /*
481 * MAP_PRIVATE device mappings don't make sense (and aren't
482 * supported anyway). However, some programs rely on this,
483 * so just change it to MAP_SHARED.
484 */
485 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
486 flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
487 }
488
489 /*
490 * now check protection
491 */
492
493 maxprot = VM_PROT_EXECUTE;
494
495 /* check read access */
496 if (fp->f_flag & FREAD)
497 maxprot |= VM_PROT_READ;
498 else if (prot & PROT_READ) {
499 error = EACCES;
500 goto out;
501 }
502
503 /* check write access, shared case first */
504 if (flags & MAP_SHARED) {
505 /*
506 * if the file is writable, only add PROT_WRITE to
507 * maxprot if the file is not immutable, append-only.
508 * otherwise, if we have asked for PROT_WRITE, return
509 * EPERM.
510 */
511 if (fp->f_flag & FWRITE) {
512 if ((error =
513 VOP_GETATTR(vp, &va, p->p_ucred, p)))
514 goto out;
515 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
516 maxprot |= VM_PROT_WRITE;
517 else if (prot & PROT_WRITE) {
518 error = EPERM;
519 goto out;
520 }
521 } else if (prot & PROT_WRITE) {
522 error = EACCES;
523 goto out;
524 }
525 } else {
526 /* MAP_PRIVATE mappings can always write to */
527 maxprot |= VM_PROT_WRITE;
528 }
529
530 /*
531 * set handle to vnode
532 */
533
534 handle = (caddr_t)vp;
535
536 } else { /* MAP_ANON case */
537 /*
538 * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
539 */
540 if (fd != -1) {
541 error = EINVAL;
542 goto out;
543 }
544
545 is_anon: /* label for SunOS style /dev/zero */
546 handle = NULL;
547 maxprot = VM_PROT_ALL;
548 pos = 0;
549 }
550
551 /*
552 * XXX (in)sanity check. We don't do proper datasize checking
553 * XXX for anonymous (or private writable) mmap(). However,
554 * XXX know that if we're trying to allocate more than the amount
555 * XXX remaining under our current data size limit, _that_ should
556 * XXX be disallowed.
557 */
558 if ((flags & MAP_ANON) != 0 ||
559 ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
560 if (size >
561 (p->p_rlimit[RLIMIT_DATA].rlim_cur - ctob(p->p_vmspace->vm_dsize))) {
562 error = ENOMEM;
563 goto out;
564 }
565 }
566
567 /*
568 * now let kernel internal function uvm_mmap do the work.
569 */
570
571 error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
572 flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
573
574 if (error == 0)
575 /* remember to add offset */
576 *retval = (register_t)(addr + pageoff);
577
578 out:
579 if (fp)
580 FRELE(fp);
581 return (error);
582 }
583
584 /*
585 * sys_msync: the msync system call (a front-end for flush)
586 */
587
588 int
sys_msync(p,v,retval)589 sys_msync(p, v, retval)
590 struct proc *p;
591 void *v;
592 register_t *retval;
593 {
594 struct sys_msync_args /* {
595 syscallarg(void *) addr;
596 syscallarg(size_t) len;
597 syscallarg(int) flags;
598 } */ *uap = v;
599 vaddr_t addr;
600 vsize_t size, pageoff;
601 vm_map_t map;
602 int rv, flags, uvmflags;
603
604 /*
605 * extract syscall args from the uap
606 */
607
608 addr = (vaddr_t)SCARG(uap, addr);
609 size = (vsize_t)SCARG(uap, len);
610 flags = SCARG(uap, flags);
611
612 /* sanity check flags */
613 if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
614 (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
615 (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
616 return (EINVAL);
617 if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
618 flags |= MS_SYNC;
619
620 /*
621 * align the address to a page boundary, and adjust the size accordingly
622 */
623
624 pageoff = (addr & PAGE_MASK);
625 addr -= pageoff;
626 size += pageoff;
627 size = (vsize_t) round_page(size);
628
629 /* disallow wrap-around. */
630 if (addr + (ssize_t)size < addr)
631 return (EINVAL);
632
633 /*
634 * get map
635 */
636
637 map = &p->p_vmspace->vm_map;
638
639 /*
640 * XXXCDC: do we really need this semantic?
641 *
642 * XXX Gak! If size is zero we are supposed to sync "all modified
643 * pages with the region containing addr". Unfortunately, we
644 * don't really keep track of individual mmaps so we approximate
645 * by flushing the range of the map entry containing addr.
646 * This can be incorrect if the region splits or is coalesced
647 * with a neighbor.
648 */
649 if (size == 0) {
650 vm_map_entry_t entry;
651
652 vm_map_lock_read(map);
653 rv = uvm_map_lookup_entry(map, addr, &entry);
654 if (rv == TRUE) {
655 addr = entry->start;
656 size = entry->end - entry->start;
657 }
658 vm_map_unlock_read(map);
659 if (rv == FALSE)
660 return (EINVAL);
661 }
662
663 /*
664 * translate MS_ flags into PGO_ flags
665 */
666 uvmflags = PGO_CLEANIT;
667 if (flags & MS_INVALIDATE)
668 uvmflags |= PGO_FREE;
669 if (flags & MS_SYNC)
670 uvmflags |= PGO_SYNCIO;
671 else
672 uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
673
674 /*
675 * doit!
676 */
677 rv = uvm_map_clean(map, addr, addr+size, uvmflags);
678
679 /*
680 * and return...
681 */
682 return (rv);
683 }
684
685 /*
686 * sys_munmap: unmap a users memory
687 */
688
689 int
sys_munmap(p,v,retval)690 sys_munmap(p, v, retval)
691 struct proc *p;
692 void *v;
693 register_t *retval;
694 {
695 struct sys_munmap_args /* {
696 syscallarg(void *) addr;
697 syscallarg(size_t) len;
698 } */ *uap = v;
699 vaddr_t addr;
700 vsize_t size, pageoff;
701 vm_map_t map;
702 vaddr_t vm_min_address = VM_MIN_ADDRESS;
703 struct vm_map_entry *dead_entries;
704
705 /*
706 * get syscall args...
707 */
708
709 addr = (vaddr_t) SCARG(uap, addr);
710 size = (vsize_t) SCARG(uap, len);
711
712 /*
713 * align the address to a page boundary, and adjust the size accordingly
714 */
715
716 pageoff = (addr & PAGE_MASK);
717 addr -= pageoff;
718 size += pageoff;
719 size = (vsize_t) round_page(size);
720
721 if ((ssize_t)size < 0)
722 return (EINVAL);
723 if (size == 0)
724 return (0);
725
726 /*
727 * Check for illegal addresses. Watch out for address wrap...
728 * Note that VM_*_ADDRESS are not constants due to casts (argh).
729 */
730 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
731 return (EINVAL);
732 if (vm_min_address > 0 && addr < vm_min_address)
733 return (EINVAL);
734 if (addr > addr + size)
735 return (EINVAL);
736 map = &p->p_vmspace->vm_map;
737
738
739 vm_map_lock(map); /* lock map so we can checkprot */
740
741 /*
742 * interesting system call semantic: make sure entire range is
743 * allocated before allowing an unmap.
744 */
745
746 if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
747 vm_map_unlock(map);
748 return (EINVAL);
749 }
750
751 /*
752 * doit!
753 */
754 uvm_unmap_remove(map, addr, addr + size, &dead_entries);
755
756 vm_map_unlock(map); /* and unlock */
757
758 if (dead_entries != NULL)
759 uvm_unmap_detach(dead_entries, 0);
760
761 return (0);
762 }
763
764 /*
765 * sys_mprotect: the mprotect system call
766 */
767
768 int
sys_mprotect(p,v,retval)769 sys_mprotect(p, v, retval)
770 struct proc *p;
771 void *v;
772 register_t *retval;
773 {
774 struct sys_mprotect_args /* {
775 syscallarg(void *) addr;
776 syscallarg(size_t) len;
777 syscallarg(int) prot;
778 } */ *uap = v;
779 vaddr_t addr;
780 vsize_t size, pageoff;
781 vm_prot_t prot;
782 int rv;
783
784 /*
785 * extract syscall args from uap
786 */
787
788 addr = (vaddr_t)SCARG(uap, addr);
789 size = (vsize_t)SCARG(uap, len);
790 prot = SCARG(uap, prot);
791
792 if ((prot & VM_PROT_ALL) != prot)
793 return (EINVAL);
794
795 /*
796 * align the address to a page boundary, and adjust the size accordingly
797 */
798 pageoff = (addr & PAGE_MASK);
799 addr -= pageoff;
800 size += pageoff;
801 size = (vsize_t) round_page(size);
802 if ((ssize_t)size < 0)
803 return (EINVAL);
804
805 /*
806 * doit
807 */
808
809 rv = uvm_map_protect(&p->p_vmspace->vm_map,
810 addr, addr+size, prot, FALSE);
811
812 if (rv == KERN_SUCCESS)
813 return (0);
814 if (rv == KERN_PROTECTION_FAILURE)
815 return (EACCES);
816 return (EINVAL);
817 }
818
819 /*
820 * sys_minherit: the minherit system call
821 */
822
823 int
sys_minherit(p,v,retval)824 sys_minherit(p, v, retval)
825 struct proc *p;
826 void *v;
827 register_t *retval;
828 {
829 struct sys_minherit_args /* {
830 syscallarg(void *) addr;
831 syscallarg(size_t) len;
832 syscallarg(int) inherit;
833 } */ *uap = v;
834 vaddr_t addr;
835 vsize_t size, pageoff;
836 vm_inherit_t inherit;
837
838 addr = (vaddr_t)SCARG(uap, addr);
839 size = (vsize_t)SCARG(uap, len);
840 inherit = SCARG(uap, inherit);
841 /*
842 * align the address to a page boundary, and adjust the size accordingly
843 */
844
845 pageoff = (addr & PAGE_MASK);
846 addr -= pageoff;
847 size += pageoff;
848 size = (vsize_t) round_page(size);
849
850 if ((ssize_t)size < 0)
851 return (EINVAL);
852
853 switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
854 inherit)) {
855 case KERN_SUCCESS:
856 return (0);
857 case KERN_PROTECTION_FAILURE:
858 return (EACCES);
859 }
860 return (EINVAL);
861 }
862
863 /*
864 * sys_madvise: give advice about memory usage.
865 */
866
867 /* ARGSUSED */
868 int
sys_madvise(p,v,retval)869 sys_madvise(p, v, retval)
870 struct proc *p;
871 void *v;
872 register_t *retval;
873 {
874 struct sys_madvise_args /* {
875 syscallarg(void *) addr;
876 syscallarg(size_t) len;
877 syscallarg(int) behav;
878 } */ *uap = v;
879 vaddr_t addr;
880 vsize_t size, pageoff;
881 int advice, rv;
882
883 addr = (vaddr_t)SCARG(uap, addr);
884 size = (vsize_t)SCARG(uap, len);
885 advice = SCARG(uap, behav);
886
887 /*
888 * align the address to a page boundary, and adjust the size accordingly
889 */
890 pageoff = (addr & PAGE_MASK);
891 addr -= pageoff;
892 size += pageoff;
893 size = (vsize_t) round_page(size);
894
895 if ((ssize_t)size <= 0)
896 return (EINVAL);
897
898 switch (advice) {
899 case MADV_NORMAL:
900 case MADV_RANDOM:
901 case MADV_SEQUENTIAL:
902 rv = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
903 advice);
904 break;
905
906 case MADV_WILLNEED:
907 /*
908 * Activate all these pages, pre-faulting them in if
909 * necessary.
910 */
911 /*
912 * XXX IMPLEMENT ME.
913 * Should invent a "weak" mode for uvm_fault()
914 * which would only do the PGO_LOCKED pgo_get().
915 */
916 return (0);
917
918 case MADV_DONTNEED:
919 /*
920 * Deactivate all these pages. We don't need them
921 * any more. We don't, however, toss the data in
922 * the pages.
923 */
924 rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
925 PGO_DEACTIVATE);
926 break;
927
928 case MADV_FREE:
929 /*
930 * These pages contain no valid data, and may be
931 * garbage-collected. Toss all resources, including
932 * any swap space in use.
933 */
934 rv = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
935 PGO_FREE);
936 break;
937
938 case MADV_SPACEAVAIL:
939 /*
940 * XXXMRG What is this? I think it's:
941 *
942 * Ensure that we have allocated backing-store
943 * for these pages.
944 *
945 * This is going to require changes to the page daemon,
946 * as it will free swap space allocated to pages in core.
947 * There's also what to do for device/file/anonymous memory.
948 */
949 return (EINVAL);
950
951 default:
952 return (EINVAL);
953 }
954
955 return (rv);
956 }
957
958 /*
959 * sys_mlock: memory lock
960 */
961
962 int
sys_mlock(p,v,retval)963 sys_mlock(p, v, retval)
964 struct proc *p;
965 void *v;
966 register_t *retval;
967 {
968 struct sys_mlock_args /* {
969 syscallarg(const void *) addr;
970 syscallarg(size_t) len;
971 } */ *uap = v;
972 vaddr_t addr;
973 vsize_t size, pageoff;
974 int error;
975
976 /*
977 * extract syscall args from uap
978 */
979 addr = (vaddr_t)SCARG(uap, addr);
980 size = (vsize_t)SCARG(uap, len);
981
982 /*
983 * align the address to a page boundary and adjust the size accordingly
984 */
985 pageoff = (addr & PAGE_MASK);
986 addr -= pageoff;
987 size += pageoff;
988 size = (vsize_t) round_page(size);
989
990 /* disallow wrap-around. */
991 if (addr + (ssize_t)size < addr)
992 return (EINVAL);
993
994 if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
995 return (EAGAIN);
996
997 #ifdef pmap_wired_count
998 if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
999 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
1000 return (EAGAIN);
1001 #else
1002 if ((error = suser(p, 0)) != 0)
1003 return (error);
1004 #endif
1005
1006 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
1007 0);
1008 return (error == KERN_SUCCESS ? 0 : ENOMEM);
1009 }
1010
1011 /*
1012 * sys_munlock: unlock wired pages
1013 */
1014
1015 int
sys_munlock(p,v,retval)1016 sys_munlock(p, v, retval)
1017 struct proc *p;
1018 void *v;
1019 register_t *retval;
1020 {
1021 struct sys_munlock_args /* {
1022 syscallarg(const void *) addr;
1023 syscallarg(size_t) len;
1024 } */ *uap = v;
1025 vaddr_t addr;
1026 vsize_t size, pageoff;
1027 int error;
1028
1029 /*
1030 * extract syscall args from uap
1031 */
1032
1033 addr = (vaddr_t)SCARG(uap, addr);
1034 size = (vsize_t)SCARG(uap, len);
1035
1036 /*
1037 * align the address to a page boundary, and adjust the size accordingly
1038 */
1039 pageoff = (addr & PAGE_MASK);
1040 addr -= pageoff;
1041 size += pageoff;
1042 size = (vsize_t) round_page(size);
1043
1044 /* disallow wrap-around. */
1045 if (addr + (ssize_t)size < addr)
1046 return (EINVAL);
1047
1048 #ifndef pmap_wired_count
1049 if ((error = suser(p, 0)) != 0)
1050 return (error);
1051 #endif
1052
1053 error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
1054 0);
1055 return (error == KERN_SUCCESS ? 0 : ENOMEM);
1056 }
1057
1058 /*
1059 * sys_mlockall: lock all pages mapped into an address space.
1060 */
1061
1062 int
sys_mlockall(p,v,retval)1063 sys_mlockall(p, v, retval)
1064 struct proc *p;
1065 void *v;
1066 register_t *retval;
1067 {
1068 struct sys_mlockall_args /* {
1069 syscallarg(int) flags;
1070 } */ *uap = v;
1071 int error, flags;
1072
1073 flags = SCARG(uap, flags);
1074
1075 if (flags == 0 ||
1076 (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
1077 return (EINVAL);
1078
1079 #ifndef pmap_wired_count
1080 if ((error = suser(p, 0)) != 0)
1081 return (error);
1082 #endif
1083
1084 error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
1085 p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
1086 switch (error) {
1087 case KERN_SUCCESS:
1088 error = 0;
1089 break;
1090
1091 case KERN_NO_SPACE: /* XXX overloaded */
1092 error = ENOMEM;
1093 break;
1094
1095 default:
1096 /*
1097 * "Some or all of the memory could not be locked when
1098 * the call was made."
1099 */
1100 error = EAGAIN;
1101 }
1102
1103 return (error);
1104 }
1105
1106 /*
1107 * sys_munlockall: unlock all pages mapped into an address space.
1108 */
1109
1110 int
sys_munlockall(p,v,retval)1111 sys_munlockall(p, v, retval)
1112 struct proc *p;
1113 void *v;
1114 register_t *retval;
1115 {
1116
1117 (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
1118 return (0);
1119 }
1120
1121 /*
1122 * uvm_mmap: internal version of mmap
1123 *
1124 * - used by sys_mmap, exec, and sysv shm
1125 * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
1126 * sysv shm uses "named anonymous memory")
1127 * - caller must page-align the file offset
1128 */
1129
1130 int
uvm_mmap(map,addr,size,prot,maxprot,flags,handle,foff,locklimit)1131 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
1132 vm_map_t map;
1133 vaddr_t *addr;
1134 vsize_t size;
1135 vm_prot_t prot, maxprot;
1136 int flags;
1137 caddr_t handle; /* XXX: VNODE? */
1138 voff_t foff;
1139 vsize_t locklimit;
1140 {
1141 struct uvm_object *uobj;
1142 struct vnode *vp;
1143 int retval;
1144 int advice = UVM_ADV_NORMAL;
1145 uvm_flag_t uvmflag = 0;
1146 vsize_t align = 0; /* userland page size */
1147
1148 /*
1149 * check params
1150 */
1151
1152 if (size == 0)
1153 return(0);
1154 if (foff & PAGE_MASK)
1155 return(EINVAL);
1156 if ((prot & maxprot) != prot)
1157 return(EINVAL);
1158
1159 /*
1160 * for non-fixed mappings, round off the suggested address.
1161 * for fixed mappings, check alignment and zap old mappings.
1162 */
1163
1164 if ((flags & MAP_FIXED) == 0) {
1165 *addr = round_page(*addr); /* round */
1166 } else {
1167 if (*addr & PAGE_MASK)
1168 return(EINVAL);
1169
1170 uvmflag |= UVM_FLAG_FIXED;
1171 if ((flags & __MAP_NOREPLACE) == 0)
1172 uvm_unmap(map, *addr, *addr + size); /* zap! */
1173 }
1174
1175 /*
1176 * handle anon vs. non-anon mappings. for non-anon mappings attach
1177 * to underlying vm object.
1178 */
1179
1180 if (flags & MAP_ANON) {
1181 if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
1182 align = __LDPGSZ;
1183 foff = UVM_UNKNOWN_OFFSET;
1184 uobj = NULL;
1185 if ((flags & MAP_SHARED) == 0)
1186 /* XXX: defer amap create */
1187 uvmflag |= UVM_FLAG_COPYONW;
1188 else
1189 /* shared: create amap now */
1190 uvmflag |= UVM_FLAG_OVERLAY;
1191
1192 } else {
1193
1194 vp = (struct vnode *) handle; /* get vnode */
1195 if (vp->v_type != VCHR) {
1196 uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
1197 maxprot : (maxprot & ~VM_PROT_WRITE));
1198
1199 #ifndef UBC
1200 /*
1201 * XXXCDC: hack from old code
1202 * don't allow vnodes which have been mapped
1203 * shared-writeable to persist [forces them to be
1204 * flushed out when last reference goes].
1205 * XXXCDC: interesting side effect: avoids a bug.
1206 * note that in WRITE [ufs_readwrite.c] that we
1207 * allocate buffer, uncache, and then do the write.
1208 * the problem with this is that if the uncache causes
1209 * VM data to be flushed to the same area of the file
1210 * we are writing to... in that case we've got the
1211 * buffer locked and our process goes to sleep forever.
1212 *
1213 * XXXCDC: checking maxprot protects us from the
1214 * "persistbug" program but this is not a long term
1215 * solution.
1216 *
1217 * XXXCDC: we don't bother calling uncache with the vp
1218 * VOP_LOCKed since we know that we are already
1219 * holding a valid reference to the uvn (from the
1220 * uvn_attach above), and thus it is impossible for
1221 * the uncache to kill the uvn and trigger I/O.
1222 */
1223 if (flags & MAP_SHARED) {
1224 if ((prot & VM_PROT_WRITE) ||
1225 (maxprot & VM_PROT_WRITE)) {
1226 uvm_vnp_uncache(vp);
1227 }
1228 }
1229 #else
1230 /* XXX for now, attach doesn't gain a ref */
1231 VREF(vp);
1232 #endif
1233 } else {
1234 uobj = udv_attach((void *) &vp->v_rdev,
1235 (flags & MAP_SHARED) ? maxprot :
1236 (maxprot & ~VM_PROT_WRITE), foff, size);
1237 /*
1238 * XXX Some devices don't like to be mapped with
1239 * XXX PROT_EXEC, but we don't really have a
1240 * XXX better way of handling this, right now
1241 */
1242 if (uobj == NULL && (prot & PROT_EXEC) == 0) {
1243 maxprot &= ~VM_PROT_EXECUTE;
1244 uobj = udv_attach((void *) &vp->v_rdev,
1245 (flags & MAP_SHARED) ? maxprot :
1246 (maxprot & ~VM_PROT_WRITE), foff, size);
1247 }
1248 advice = UVM_ADV_RANDOM;
1249 }
1250
1251 if (uobj == NULL)
1252 return((vp->v_type == VREG) ? ENOMEM : EINVAL);
1253
1254 if ((flags & MAP_SHARED) == 0)
1255 uvmflag |= UVM_FLAG_COPYONW;
1256 }
1257
1258 /*
1259 * set up mapping flags
1260 */
1261
1262 uvmflag = UVM_MAPFLAG(prot, maxprot,
1263 (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
1264 advice, uvmflag);
1265
1266 /*
1267 * do it!
1268 */
1269
1270 retval = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1271
1272 if (retval == KERN_SUCCESS) {
1273 /*
1274 * POSIX 1003.1b -- if our address space was configured
1275 * to lock all future mappings, wire the one we just made.
1276 */
1277 if (prot == VM_PROT_NONE) {
1278 /*
1279 * No more work to do in this case.
1280 */
1281 return (0);
1282 }
1283
1284 vm_map_lock(map);
1285
1286 if (map->flags & VM_MAP_WIREFUTURE) {
1287 if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
1288 #ifdef pmap_wired_count
1289 || (locklimit != 0 && (size +
1290 ptoa(pmap_wired_count(vm_map_pmap(map)))) >
1291 locklimit)
1292 #endif
1293 ) {
1294 retval = KERN_RESOURCE_SHORTAGE;
1295 vm_map_unlock(map);
1296 /* unmap the region! */
1297 uvm_unmap(map, *addr, *addr + size);
1298 goto bad;
1299 }
1300 /*
1301 * uvm_map_pageable() always returns the map
1302 * unlocked.
1303 */
1304 retval = uvm_map_pageable(map, *addr, *addr + size,
1305 FALSE, UVM_LK_ENTER);
1306 if (retval != KERN_SUCCESS) {
1307 /* unmap the region! */
1308 uvm_unmap(map, *addr, *addr + size);
1309 goto bad;
1310 }
1311 return (0);
1312 }
1313
1314 vm_map_unlock(map);
1315
1316 return (0);
1317 }
1318
1319 /*
1320 * errors: first detach from the uobj, if any.
1321 */
1322
1323 if (uobj)
1324 uobj->pgops->pgo_detach(uobj);
1325
1326 bad:
1327 return (retval);
1328 }
1329