1 /*	$OpenBSD: pmap.c,v 1.75 2004/02/01 12:26:45 grange Exp $	*/
2 /*	$NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $	*/
3 
4 /*
5  *
6  * Copyright (c) 1997 Charles D. Cranor and Washington University.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Charles D. Cranor and
20  *      Washington University.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * pmap.c: i386 pmap module rewrite
38  * Chuck Cranor <chuck@ccrc.wustl.edu>
39  * 11-Aug-97
40  *
41  * history of this pmap module: in addition to my own input, i used
42  *    the following references for this rewrite of the i386 pmap:
43  *
44  * [1] the NetBSD i386 pmap.   this pmap appears to be based on the
45  *     BSD hp300 pmap done by Mike Hibler at University of Utah.
46  *     it was then ported to the i386 by William Jolitz of UUNET
47  *     Technologies, Inc.   Then Charles M. Hannum of the NetBSD
48  *     project fixed some bugs and provided some speed ups.
49  *
50  * [2] the FreeBSD i386 pmap.   this pmap seems to be the
51  *     Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
52  *     and David Greenman.
53  *
54  * [3] the Mach pmap.   this pmap, from CMU, seems to have migrated
55  *     between several processors.   the VAX version was done by
56  *     Avadis Tevanian, Jr., and Michael Wayne Young.    the i386
57  *     version was done by Lance Berc, Mike Kupfer, Bob Baron,
58  *     David Golub, and Richard Draves.    the alpha version was
59  *     done by Alessandro Forin (CMU/Mach) and Chris Demetriou
60  *     (NetBSD/alpha).
61  */
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/proc.h>
66 #include <sys/malloc.h>
67 #include <sys/pool.h>
68 #include <sys/user.h>
69 #include <sys/kernel.h>
70 
71 #include <uvm/uvm.h>
72 
73 #include <machine/cpu.h>
74 #include <machine/specialreg.h>
75 #include <machine/gdt.h>
76 
77 #include <dev/isa/isareg.h>
78 #ifdef __NetBSD__
79 #include <machine/isa_machdep.h>
80 #endif
81 #ifdef __OpenBSD__
82 #include <sys/msgbuf.h>
83 #include <stand/boot/bootarg.h>
84 #endif
85 
86 /*
87  * general info:
88  *
89  *  - for an explanation of how the i386 MMU hardware works see
90  *    the comments in <machine/pte.h>.
91  *
92  *  - for an explanation of the general memory structure used by
93  *    this pmap (including the recursive mapping), see the comments
94  *    in <machine/pmap.h>.
95  *
96  * this file contains the code for the "pmap module."   the module's
97  * job is to manage the hardware's virtual to physical address mappings.
98  * note that there are two levels of mapping in the VM system:
99  *
100  *  [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
101  *      to map ranges of virtual address space to objects/files.  for
102  *      example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
103  *      to the file /bin/ls starting at offset zero."   note that
104  *      the upper layer mapping is not concerned with how individual
105  *      vm_pages are mapped.
106  *
107  *  [2] the lower layer of the VM system (the pmap) maintains the mappings
108  *      from virtual addresses.   it is concerned with which vm_page is
109  *      mapped where.   for example, when you run /bin/ls and start
110  *      at page 0x1000 the fault routine may lookup the correct page
111  *      of the /bin/ls file and then ask the pmap layer to establish
112  *      a mapping for it.
113  *
114  * note that information in the lower layer of the VM system can be
115  * thrown away since it can easily be reconstructed from the info
116  * in the upper layer.
117  *
118  * data structures we use include:
119  *
120  *  - struct pmap: describes the address space of one thread
121  *  - struct pv_entry: describes one <PMAP,VA> mapping of a PA
122  *  - struct pv_head: there is one pv_head per managed page of
123  *	physical memory.   the pv_head points to a list of pv_entry
124  *	structures which describe all the <PMAP,VA> pairs that this
125  *      page is mapped in.    this is critical for page based operations
126  *      such as pmap_page_protect() [change protection on _all_ mappings
127  *      of a page]
128  *  - pv_page/pv_page_info: pv_entry's are allocated out of pv_page's.
129  *      if we run out of pv_entry's we allocate a new pv_page and free
130  *      its pv_entrys.
131  * - pmap_remove_record: a list of virtual addresses whose mappings
132  *	have been changed.   used for TLB flushing.
133  */
134 
135 /*
136  * memory allocation
137  *
138  *  - there are three data structures that we must dynamically allocate:
139  *
140  * [A] new process' page directory page (PDP)
141  *	- plan 1: done at pmap_create() we use
142  *	  uvm_km_alloc(kernel_map, PAGE_SIZE)  [fka kmem_alloc] to do this
143  *	  allocation.
144  *
145  * if we are low in free physical memory then we sleep in
146  * uvm_km_alloc -- in this case this is ok since we are creating
147  * a new pmap and should not be holding any locks.
148  *
149  * if the kernel is totally out of virtual space
150  * (i.e. uvm_km_alloc returns NULL), then we panic.
151  *
152  * XXX: the fork code currently has no way to return an "out of
153  * memory, try again" error code since uvm_fork [fka vm_fork]
154  * is a void function.
155  *
156  * [B] new page tables pages (PTP)
157  * 	call uvm_pagealloc()
158  * 		=> success: zero page, add to pm_pdir
159  * 		=> failure: we are out of free vm_pages, let pmap_enter()
160  *		   tell UVM about it.
161  *
162  * note: for kernel PTPs, we start with NKPTP of them.   as we map
163  * kernel memory (at uvm_map time) we check to see if we've grown
164  * the kernel pmap.   if so, we call the optional function
165  * pmap_growkernel() to grow the kernel PTPs in advance.
166  *
167  * [C] pv_entry structures
168  *	- plan 1: try to allocate one off the free list
169  *		=> success: done!
170  *		=> failure: no more free pv_entrys on the list
171  *	- plan 2: try to allocate a new pv_page to add a chunk of
172  *	pv_entrys to the free list
173  *		[a] obtain a free, unmapped, VA in kmem_map.  either
174  *		we have one saved from a previous call, or we allocate
175  *		one now using a "vm_map_lock_try" in uvm_map
176  *		=> success: we have an unmapped VA, continue to [b]
177  *		=> failure: unable to lock kmem_map or out of VA in it.
178  *			move on to plan 3.
179  *		[b] allocate a page in kmem_object for the VA
180  *		=> success: map it in, free the pv_entry's, DONE!
181  *		=> failure: kmem_object locked, no free vm_pages, etc.
182  *			save VA for later call to [a], go to plan 3.
183  *	If we fail, we simply let pmap_enter() tell UVM about it.
184  */
185 
186 /*
187  * locking
188  *
189  * we have the following locks that we must contend with:
190  *
191  * "normal" locks:
192  *
193  *  - pmap_main_lock
194  *    this lock is used to prevent deadlock and/or provide mutex
195  *    access to the pmap system.   most operations lock the pmap
196  *    structure first, then they lock the pv_lists (if needed).
197  *    however, some operations such as pmap_page_protect lock
198  *    the pv_lists and then lock pmaps.   in order to prevent a
199  *    cycle, we require a mutex lock when locking the pv_lists
200  *    first.   thus, the "pmap = >pv_list" lockers must gain a
201  *    read-lock on pmap_main_lock before locking the pmap.   and
202  *    the "pv_list => pmap" lockers must gain a write-lock on
203  *    pmap_main_lock before locking.    since only one thread
204  *    can write-lock a lock at a time, this provides mutex.
205  *
206  * "simple" locks:
207  *
208  * - pmap lock (per pmap, part of uvm_object)
209  *   this lock protects the fields in the pmap structure including
210  *   the non-kernel PDEs in the PDP, and the PTEs.  it also locks
211  *   in the alternate PTE space (since that is determined by the
212  *   entry in the PDP).
213  *
214  * - pvh_lock (per pv_head)
215  *   this lock protects the pv_entry list which is chained off the
216  *   pv_head structure for a specific managed PA.   it is locked
217  *   when traversing the list (e.g. adding/removing mappings,
218  *   syncing R/M bits, etc.)
219  *
220  * - pvalloc_lock
221  *   this lock protects the data structures which are used to manage
222  *   the free list of pv_entry structures.
223  *
224  * - pmaps_lock
225  *   this lock protects the list of active pmaps (headed by "pmaps").
226  *   we lock it when adding or removing pmaps from this list.
227  *
228  * - pmap_copy_page_lock
229  *   locks the tmp kernel PTE mappings we used to copy data
230  *
231  * - pmap_zero_page_lock
232  *   locks the tmp kernel PTE mapping we use to zero a page
233  *
234  * - pmap_tmpptp_lock
235  *   locks the tmp kernel PTE mapping we use to look at a PTP
236  *   in another process
237  *
238  * XXX: would be nice to have per-CPU VAs for the above 4
239  */
240 
241 /*
242  * locking data structures
243  */
244 
245 #ifdef __OpenBSD__
246 /* XXX */
247 #define spinlockinit(lock, name, flags) /* nada */
248 #define spinlockmgr(lock, flags, slock) /* nada */
249 #endif
250 
251 struct lock pmap_main_lock;
252 struct simplelock pvalloc_lock;
253 struct simplelock pmaps_lock;
254 struct simplelock pmap_copy_page_lock;
255 struct simplelock pmap_zero_page_lock;
256 struct simplelock pmap_tmpptp_lock;
257 
258 #define PMAP_MAP_TO_HEAD_LOCK() \
259      spinlockmgr(&pmap_main_lock, LK_SHARED, (void *) 0)
260 #define PMAP_MAP_TO_HEAD_UNLOCK() \
261      spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
262 
263 #define PMAP_HEAD_TO_MAP_LOCK() \
264      spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, (void *) 0)
265 #define PMAP_HEAD_TO_MAP_UNLOCK() \
266      spinlockmgr(&pmap_main_lock, LK_RELEASE, (void *) 0)
267 
268 /*
269  * global data structures
270  */
271 
272 struct pmap kernel_pmap_store;	/* the kernel's pmap (proc0) */
273 
274 /*
275  * nkpde is the number of kernel PTPs allocated for the kernel at
276  * boot time (NKPTP is a compile time override).   this number can
277  * grow dynamically as needed (but once allocated, we never free
278  * kernel PTPs).
279  */
280 
281 int nkpde = NKPTP;
282 #ifdef NKPDE
283 #error "obsolete NKPDE: use NKPTP"
284 #endif
285 
286 /*
287  * pmap_pg_g: if our processor supports PG_G in the PTE then we
288  * set pmap_pg_g to PG_G (otherwise it is zero).
289  */
290 
291 int pmap_pg_g = 0;
292 
293 /*
294  * i386 physical memory comes in a big contig chunk with a small
295  * hole toward the front of it...  the following 4 paddr_t's
296  * (shared with machdep.c) describe the physical address space
297  * of this machine.
298  */
299 paddr_t avail_start;	/* PA of first available physical page */
300 paddr_t avail_end;	/* PA of last available physical page */
301 paddr_t hole_start;	/* PA of start of "hole" */
302 paddr_t hole_end;	/* PA of end of "hole" */
303 
304 /*
305  * other data structures
306  */
307 
308 static pt_entry_t protection_codes[8];     /* maps MI prot to i386 prot code */
309 static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */
310 
311 /*
312  * the following two vaddr_t's are used during system startup
313  * to keep track of how much of the kernel's VM space we have used.
314  * once the system is started, the management of the remaining kernel
315  * VM space is turned over to the kernel_map vm_map.
316  */
317 
318 static vaddr_t virtual_avail;	/* VA of first free KVA */
319 static vaddr_t virtual_end;	/* VA of last free KVA */
320 
321 
322 /*
323  * pv_page management structures: locked by pvalloc_lock
324  */
325 
326 TAILQ_HEAD(pv_pagelist, pv_page);
327 static struct pv_pagelist pv_freepages;	/* list of pv_pages with free entrys */
328 static struct pv_pagelist pv_unusedpgs; /* list of unused pv_pages */
329 static int pv_nfpvents;			/* # of free pv entries */
330 static struct pv_page *pv_initpage;	/* bootstrap page from kernel_map */
331 static vaddr_t pv_cachedva;		/* cached VA for later use */
332 
333 #define PVE_LOWAT (PVE_PER_PVPAGE / 2)	/* free pv_entry low water mark */
334 #define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2))
335 					/* high water mark */
336 
337 /*
338  * linked list of all non-kernel pmaps
339  */
340 
341 static struct pmap_head pmaps;
342 static struct pmap *pmaps_hand = NULL;	/* used by pmap_steal_ptp */
343 
344 /*
345  * pool that pmap structures are allocated from
346  */
347 
348 struct pool pmap_pmap_pool;
349 
350 /*
351  * special VAs and the PTEs that map them
352  */
353 
354 static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte;
355 static caddr_t csrcp, cdstp, zerop, ptpp;
356 caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */
357 
358 #ifdef __NetBSD__
359 extern vaddr_t msgbuf_vaddr;
360 extern paddr_t msgbuf_paddr;
361 
362 extern vaddr_t idt_vaddr;			/* we allocate IDT early */
363 extern paddr_t idt_paddr;
364 #endif
365 
366 #if defined(I586_CPU)
367 /* stuff to fix the pentium f00f bug */
368 extern vaddr_t pentium_idt_vaddr;
369 #endif
370 
371 
372 /*
373  * local prototypes
374  */
375 
376 static struct pv_entry	*pmap_add_pvpage(struct pv_page *, boolean_t);
377 static struct vm_page	*pmap_alloc_ptp(struct pmap *, int, boolean_t);
378 static struct pv_entry	*pmap_alloc_pv(struct pmap *, int); /* see codes below */
379 #define ALLOCPV_NEED	0	/* need PV now */
380 #define ALLOCPV_TRY	1	/* just try to allocate, don't steal */
381 #define ALLOCPV_NONEED	2	/* don't need PV, just growing cache */
382 static struct pv_entry	*pmap_alloc_pvpage(struct pmap *, int);
383 static void		 pmap_enter_pv(struct pv_head *,
384 					    struct pv_entry *, struct pmap *,
385 					    vaddr_t, struct vm_page *);
386 static void		 pmap_free_pv(struct pmap *, struct pv_entry *);
387 static void		 pmap_free_pvs(struct pmap *, struct pv_entry *);
388 static void		 pmap_free_pv_doit(struct pv_entry *);
389 static void		 pmap_free_pvpage(void);
390 static struct vm_page	*pmap_get_ptp(struct pmap *, int, boolean_t);
391 static boolean_t	 pmap_is_curpmap(struct pmap *);
392 static pt_entry_t	*pmap_map_ptes(struct pmap *);
393 static struct pv_entry	*pmap_remove_pv(struct pv_head *, struct pmap *,
394 					     vaddr_t);
395 static boolean_t	 pmap_remove_pte(struct pmap *, struct vm_page *,
396 					      pt_entry_t *, vaddr_t);
397 static void		 pmap_remove_ptes(struct pmap *,
398 					       struct pmap_remove_record *,
399 					       struct vm_page *, vaddr_t,
400 					       vaddr_t, vaddr_t);
401 static struct vm_page	*pmap_steal_ptp(struct uvm_object *,
402 					     vaddr_t);
403 static vaddr_t		 pmap_tmpmap_pa(paddr_t);
404 static pt_entry_t	*pmap_tmpmap_pvepte(struct pv_entry *);
405 static void		 pmap_tmpunmap_pa(void);
406 static void		 pmap_tmpunmap_pvepte(struct pv_entry *);
407 static boolean_t	 pmap_try_steal_pv(struct pv_head *,
408 						struct pv_entry *,
409 						struct pv_entry *);
410 static void		pmap_unmap_ptes(struct pmap *);
411 static void		pmap_exec_account(struct pmap *, vaddr_t,
412 						pt_entry_t, pt_entry_t);
413 
414 void			pmap_pinit(pmap_t);
415 void			pmap_release(pmap_t);
416 
417 void			pmap_zero_phys(paddr_t);
418 
419 /*
420  * p m a p   i n l i n e   h e l p e r   f u n c t i o n s
421  */
422 
423 /*
424  * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
425  *		of course the kernel is always loaded
426  */
427 
428 boolean_t
pmap_is_curpmap(pmap)429 pmap_is_curpmap(pmap)
430 	struct pmap *pmap;
431 {
432 	return((pmap == pmap_kernel()) ||
433 	       (pmap->pm_pdirpa == (paddr_t) rcr3()));
434 }
435 
436 /*
437  * pmap_tmpmap_pa: map a page in for tmp usage
438  *
439  * => returns with pmap_tmpptp_lock held
440  */
441 
442 vaddr_t
pmap_tmpmap_pa(pa)443 pmap_tmpmap_pa(pa)
444 	paddr_t pa;
445 {
446 	simple_lock(&pmap_tmpptp_lock);
447 #if defined(DIAGNOSTIC)
448 	if (*ptp_pte)
449 		panic("pmap_tmpmap_pa: ptp_pte in use?");
450 #endif
451 	*ptp_pte = PG_V | PG_RW | pa;		/* always a new mapping */
452 	return((vaddr_t)ptpp);
453 }
454 
455 /*
456  * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa)
457  *
458  * => we release pmap_tmpptp_lock
459  */
460 
461 void
pmap_tmpunmap_pa()462 pmap_tmpunmap_pa()
463 {
464 #if defined(DIAGNOSTIC)
465 	if (!pmap_valid_entry(*ptp_pte))
466 		panic("pmap_tmpunmap_pa: our pte invalid?");
467 #endif
468 	*ptp_pte = 0;		/* zap! */
469 	pmap_update_pg((vaddr_t)ptpp);
470 	simple_unlock(&pmap_tmpptp_lock);
471 }
472 
473 /*
474  * pmap_tmpmap_pvepte: get a quick mapping of a PTE for a pv_entry
475  *
476  * => do NOT use this on kernel mappings [why?  because pv_ptp may be NULL]
477  * => we may grab pmap_tmpptp_lock and return with it held
478  */
479 
480 pt_entry_t *
pmap_tmpmap_pvepte(pve)481 pmap_tmpmap_pvepte(pve)
482 	struct pv_entry *pve;
483 {
484 #ifdef DIAGNOSTIC
485 	if (pve->pv_pmap == pmap_kernel())
486 		panic("pmap_tmpmap_pvepte: attempt to map kernel");
487 #endif
488 
489 	/* is it current pmap?  use direct mapping... */
490 	if (pmap_is_curpmap(pve->pv_pmap))
491 		return(vtopte(pve->pv_va));
492 
493 	return(((pt_entry_t *)pmap_tmpmap_pa(VM_PAGE_TO_PHYS(pve->pv_ptp)))
494 	       + ptei((unsigned)pve->pv_va));
495 }
496 
497 /*
498  * pmap_tmpunmap_pvepte: release a mapping obtained with pmap_tmpmap_pvepte
499  *
500  * => we will release pmap_tmpptp_lock if we hold it
501  */
502 
503 void
pmap_tmpunmap_pvepte(pve)504 pmap_tmpunmap_pvepte(pve)
505 	struct pv_entry *pve;
506 {
507 	/* was it current pmap?   if so, return */
508 	if (pmap_is_curpmap(pve->pv_pmap))
509 		return;
510 
511 	pmap_tmpunmap_pa();
512 }
513 
514 /*
515  * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
516  *
517  * => we lock enough pmaps to keep things locked in
518  * => must be undone with pmap_unmap_ptes before returning
519  */
520 
521 pt_entry_t *
pmap_map_ptes(pmap)522 pmap_map_ptes(pmap)
523 	struct pmap *pmap;
524 {
525 	pd_entry_t opde;
526 
527 	/* the kernel's pmap is always accessible */
528 	if (pmap == pmap_kernel()) {
529 		return(PTE_BASE);
530 	}
531 
532 	/* if curpmap then we are always mapped */
533 	if (pmap_is_curpmap(pmap)) {
534 		simple_lock(&pmap->pm_obj.vmobjlock);
535 		return(PTE_BASE);
536 	}
537 
538 	/* need to lock both curpmap and pmap: use ordered locking */
539 	if ((unsigned) pmap < (unsigned) curpcb->pcb_pmap) {
540 		simple_lock(&pmap->pm_obj.vmobjlock);
541 		simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
542 	} else {
543 		simple_lock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
544 		simple_lock(&pmap->pm_obj.vmobjlock);
545 	}
546 
547 	/* need to load a new alternate pt space into curpmap? */
548 	opde = *APDP_PDE;
549 	if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdirpa) {
550 		*APDP_PDE = (pd_entry_t) (pmap->pm_pdirpa | PG_RW | PG_V);
551 		if (pmap_valid_entry(opde))
552 			tlbflush();
553 	}
554 	return(APTE_BASE);
555 }
556 
557 /*
558  * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
559  */
560 
561 void
pmap_unmap_ptes(pmap)562 pmap_unmap_ptes(pmap)
563 	struct pmap *pmap;
564 {
565 	if (pmap == pmap_kernel()) {
566 		return;
567 	}
568 	if (pmap_is_curpmap(pmap)) {
569 		simple_unlock(&pmap->pm_obj.vmobjlock);
570 	} else {
571 		simple_unlock(&pmap->pm_obj.vmobjlock);
572 		simple_unlock(&curpcb->pcb_pmap->pm_obj.vmobjlock);
573 	}
574 }
575 
576 void
pmap_exec_account(struct pmap * pm,vaddr_t va,pt_entry_t opte,pt_entry_t npte)577 pmap_exec_account(struct pmap *pm, vaddr_t va,
578     pt_entry_t opte, pt_entry_t npte)
579 {
580 	if (curproc == NULL || curproc->p_vmspace == NULL ||
581 	    pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
582 		return;
583 
584 	if ((opte ^ npte) & PG_X)
585 		pmap_update_pg(va);
586 
587 	/*
588 	 * Executability was removed on the last executable change.
589 	 * Reset the code segment to something conservative and
590 	 * let the trap handler deal with setting the right limit.
591 	 * We can't do that because of locking constraints on the vm map.
592 	 *
593 	 * XXX - floating cs - set this _really_ low.
594 	 */
595 	if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
596 		struct trapframe *tf = curproc->p_md.md_regs;
597 		struct pcb *pcb = &curproc->p_addr->u_pcb;
598 
599 		pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
600 		pm->pm_hiexec = I386_MAX_EXE_ADDR;
601 	}
602 }
603 
604 /*
605  * Fixup the code segment to cover all potential executable mappings.
606  * returns 0 if no changes to the code segment were made.
607  */
608 int
pmap_exec_fixup(struct vm_map * map,struct trapframe * tf,struct pcb * pcb)609 pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
610 {
611 	struct vm_map_entry *ent;
612 	struct pmap *pm = vm_map_pmap(map);
613 	vaddr_t va = 0;
614 
615 	vm_map_lock(map);
616 	for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
617 		/*
618 		 * This entry has greater va than the entries before.
619 		 * We need to make it point to the last page, not past it.
620 		 */
621 		if (ent->protection & VM_PROT_EXECUTE)
622 			va = trunc_page(ent->end) - PAGE_SIZE;
623 	}
624 	vm_map_unlock(map);
625 
626 	if (va == pm->pm_hiexec)
627 		return (0);
628 
629 	pm->pm_hiexec = va;
630 
631 	if (pm->pm_hiexec > (vaddr_t)I386_MAX_EXE_ADDR) {
632 		pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE1_SEL, SEL_UPL);
633 	} else {
634 		pcb->pcb_cs = tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
635 	}
636 
637 	return (1);
638 }
639 
640 /*
641  * p m a p   k e n t e r   f u n c t i o n s
642  *
643  * functions to quickly enter/remove pages from the kernel address
644  * space.   pmap_kremove is exported to MI kernel.  we make use of
645  * the recursive PTE mappings.
646  */
647 
648 /*
649  * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
650  *
651  * => no need to lock anything, assume va is already allocated
652  * => should be faster than normal pmap enter function
653  */
654 
655 void
pmap_kenter_pa(va,pa,prot)656 pmap_kenter_pa(va, pa, prot)
657 	vaddr_t va;
658 	paddr_t pa;
659 	vm_prot_t prot;
660 {
661 	pt_entry_t *pte, opte;
662 
663 	pte = vtopte(va);
664 	opte = *pte;
665 	*pte = pa | ((prot & VM_PROT_WRITE)? PG_RW : PG_RO) |
666 		PG_V | pmap_pg_g;	/* zap! */
667 	if (pmap_valid_entry(opte))
668 		pmap_update_pg(va);
669 }
670 
671 /*
672  * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
673  *
674  * => no need to lock anything
675  * => caller must dispose of any vm_page mapped in the va range
676  * => note: not an inline function
677  * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
678  * => we assume kernel only unmaps valid addresses and thus don't bother
679  *    checking the valid bit before doing TLB flushing
680  */
681 
682 void
pmap_kremove(va,len)683 pmap_kremove(va, len)
684 	vaddr_t va;
685 	vsize_t len;
686 {
687 	pt_entry_t *pte;
688 
689 	len >>= PAGE_SHIFT;
690 	for ( /* null */ ; len ; len--, va += NBPG) {
691 		pte = vtopte(va);
692 #ifdef DIAGNOSTIC
693 		if (*pte & PG_PVLIST)
694 			panic("pmap_kremove: PG_PVLIST mapping for 0x%lx",
695 			      va);
696 #endif
697 		*pte = 0;		/* zap! */
698 #if defined(I386_CPU)
699 		if (cpu_class != CPUCLASS_386)
700 #endif
701 			pmap_update_pg(va);
702 	}
703 #if defined(I386_CPU)
704 	if (cpu_class == CPUCLASS_386)
705 		tlbflush();
706 #endif
707 }
708 
709 /*
710  * p m a p   i n i t   f u n c t i o n s
711  *
712  * pmap_bootstrap and pmap_init are called during system startup
713  * to init the pmap module.   pmap_bootstrap() does a low level
714  * init just to get things rolling.   pmap_init() finishes the job.
715  */
716 
717 /*
718  * pmap_bootstrap: get the system in a state where it can run with VM
719  *	properly enabled (called before main()).   the VM system is
720  *      fully init'd later...
721  *
722  * => on i386, locore.s has already enabled the MMU by allocating
723  *	a PDP for the kernel, and nkpde PTP's for the kernel.
724  * => kva_start is the first free virtual address in kernel space
725  */
726 
727 void
pmap_bootstrap(kva_start)728 pmap_bootstrap(kva_start)
729 	vaddr_t kva_start;
730 {
731 	struct pmap *kpm;
732 	vaddr_t kva;
733 	pt_entry_t *pte;
734 
735 	/*
736 	 * set the page size (default value is 4K which is ok)
737 	 */
738 
739 	uvm_setpagesize();
740 
741 	/*
742 	 * a quick sanity check
743 	 */
744 
745 	if (PAGE_SIZE != NBPG)
746 		panic("pmap_bootstrap: PAGE_SIZE != NBPG");
747 
748 	/*
749 	 * use the very last page of physical memory for the message buffer
750 	 */
751 
752 	avail_end -= i386_round_page(MSGBUFSIZE);
753 	/*
754 	 * The arguments passed in from /boot needs space too.
755 	 */
756 	avail_end -= i386_round_page(bootargc);
757 
758 	/*
759 	 * set up our local static global vars that keep track of the
760 	 * usage of KVM before kernel_map is set up
761 	 */
762 
763 	virtual_avail = kva_start;		/* first free KVA */
764 	virtual_end = VM_MAX_KERNEL_ADDRESS;	/* last KVA */
765 
766 	/*
767 	 * set up protection_codes: we need to be able to convert from
768 	 * a MI protection code (some combo of VM_PROT...) to something
769 	 * we can jam into a i386 PTE.
770 	 */
771 
772 	protection_codes[UVM_PROT_NONE] = 0;  			/* --- */
773 	protection_codes[UVM_PROT_EXEC] = PG_X;			/* --x */
774 	protection_codes[UVM_PROT_READ] = PG_RO;		/* -r- */
775 	protection_codes[UVM_PROT_RX] = PG_X;			/* -rx */
776 	protection_codes[UVM_PROT_WRITE] = PG_RW;		/* w-- */
777 	protection_codes[UVM_PROT_WX] = PG_RW|PG_X;		/* w-x */
778 	protection_codes[UVM_PROT_RW] = PG_RW;			/* wr- */
779 	protection_codes[UVM_PROT_RWX] = PG_RW|PG_X;		/* wrx */
780 
781 	/*
782 	 * now we init the kernel's pmap
783 	 *
784 	 * the kernel pmap's pm_obj is not used for much.   however, in
785 	 * user pmaps the pm_obj contains the list of active PTPs.
786 	 * the pm_obj currently does not have a pager.   it might be possible
787 	 * to add a pager that would allow a process to read-only mmap its
788 	 * own page tables (fast user level vtophys?).   this may or may not
789 	 * be useful.
790 	 */
791 
792 	kpm = pmap_kernel();
793 	simple_lock_init(&kpm->pm_obj.vmobjlock);
794 	kpm->pm_obj.pgops = NULL;
795 	TAILQ_INIT(&kpm->pm_obj.memq);
796 	kpm->pm_obj.uo_npages = 0;
797 	kpm->pm_obj.uo_refs = 1;
798 	bzero(&kpm->pm_list, sizeof(kpm->pm_list));  /* pm_list not used */
799 	kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
800 	kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3;
801 	kpm->pm_stats.wired_count = kpm->pm_stats.resident_count =
802 		i386_btop(kva_start - VM_MIN_KERNEL_ADDRESS);
803 
804 	/*
805 	 * the above is just a rough estimate and not critical to the proper
806 	 * operation of the system.
807 	 */
808 
809 	curpcb->pcb_pmap = kpm;	/* proc0's pcb */
810 
811 	/*
812 	 * enable global TLB entries if they are supported
813 	 */
814 
815 	if (cpu_feature & CPUID_PGE) {
816 		lcr4(rcr4() | CR4_PGE);	/* enable hardware (via %cr4) */
817 		pmap_pg_g = PG_G;		/* enable software */
818 
819 		/* add PG_G attribute to already mapped kernel pages */
820 		for (kva = VM_MIN_KERNEL_ADDRESS ; kva < virtual_avail ;
821 		     kva += PAGE_SIZE)
822 			if (pmap_valid_entry(PTE_BASE[i386_btop(kva)]))
823 				PTE_BASE[i386_btop(kva)] |= PG_G;
824 	}
825 
826 	/*
827 	 * now we allocate the "special" VAs which are used for tmp mappings
828 	 * by the pmap (and other modules).    we allocate the VAs by advancing
829 	 * virtual_avail (note that there are no pages mapped at these VAs).
830 	 * we find the PTE that maps the allocated VA via the linear PTE
831 	 * mapping.
832 	 */
833 
834 	pte = PTE_BASE + i386_btop(virtual_avail);
835 
836 	csrcp = (caddr_t) virtual_avail;  csrc_pte = pte;	/* allocate */
837 	virtual_avail += PAGE_SIZE; pte++;			/* advance */
838 
839 	cdstp = (caddr_t) virtual_avail;  cdst_pte = pte;
840 	virtual_avail += PAGE_SIZE; pte++;
841 
842 	zerop = (caddr_t) virtual_avail;  zero_pte = pte;
843 	virtual_avail += PAGE_SIZE; pte++;
844 
845 	ptpp = (caddr_t) virtual_avail;  ptp_pte = pte;
846 	virtual_avail += PAGE_SIZE; pte++;
847 
848 	/* XXX: vmmap used by mem.c... should be uvm_map_reserve */
849 	vmmap = (char *)virtual_avail;			/* don't need pte */
850 	virtual_avail += PAGE_SIZE; pte++;
851 
852 #ifdef __NetBSD
853 	msgbuf_vaddr = virtual_avail;			/* don't need pte */
854 #endif
855 #ifdef __OpenBSD__
856 	msgbufp = (struct msgbuf *)virtual_avail;	/* don't need pte */
857 #endif
858 	virtual_avail += round_page(MSGBUFSIZE); pte++;
859 
860 #ifdef __NetBSD__
861 	idt_vaddr = virtual_avail;			/* don't need pte */
862 	virtual_avail += PAGE_SIZE; pte++;
863 	idt_paddr = avail_start;			/* steal a page */
864 	avail_start += PAGE_SIZE;
865 
866 #if defined(I586_CPU)
867 	/* pentium f00f bug stuff */
868 	pentium_idt_vaddr = virtual_avail;		/* don't need pte */
869 	virtual_avail += PAGE_SIZE; pte++;
870 #endif
871 #endif
872 
873 #ifdef __OpenBSD__
874 	bootargp = (bootarg_t *)virtual_avail;
875 	virtual_avail += round_page(bootargc); pte++;
876 #endif
877 
878 	/*
879 	 * now we reserve some VM for mapping pages when doing a crash dump
880 	 */
881 
882 	virtual_avail = reserve_dumppages(virtual_avail);
883 
884 	/*
885 	 * init the static-global locks and global lists.
886 	 */
887 
888 	spinlockinit(&pmap_main_lock, "pmaplk", 0);
889 	simple_lock_init(&pvalloc_lock);
890 	simple_lock_init(&pmaps_lock);
891 	simple_lock_init(&pmap_copy_page_lock);
892 	simple_lock_init(&pmap_zero_page_lock);
893 	simple_lock_init(&pmap_tmpptp_lock);
894 	LIST_INIT(&pmaps);
895 	TAILQ_INIT(&pv_freepages);
896 	TAILQ_INIT(&pv_unusedpgs);
897 
898 	/*
899 	 * initialize the pmap pool.
900 	 */
901 
902 	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
903 	    &pool_allocator_nointr);
904 
905 #ifdef __NetBSD__
906 	/*
907 	 * we must call uvm_page_physload() after we are done playing with
908 	 * virtual_avail but before we call pmap_steal_memory.  [i.e. here]
909 	 * this call tells the VM system how much physical memory it
910 	 * controls.  If we have 16M of RAM or less, just put it all on
911 	 * the default free list.  Otherwise, put the first 16M of RAM
912 	 * on a lower priority free list (so that all of the ISA DMA'able
913 	 * memory won't be eaten up first-off).
914 	 */
915 
916 	if (avail_end <= (16 * 1024 * 1024))
917 		first16q = VM_FREELIST_DEFAULT;
918 	else
919 		first16q = VM_FREELIST_FIRST16;
920 
921 	if (avail_start < hole_start)   /* any free memory before the hole? */
922 		uvm_page_physload(atop(avail_start), atop(hole_start),
923 				  atop(avail_start), atop(hole_start),
924 				  first16q);
925 
926 	if (first16q != VM_FREELIST_DEFAULT &&
927 	    hole_end < 16 * 1024 * 1024) {
928 		uvm_page_physload(atop(hole_end), atop(16 * 1024 * 1024),
929 				  atop(hole_end), atop(16 * 1024 * 1024),
930 				  first16q);
931 		uvm_page_physload(atop(16 * 1024 * 1024), atop(avail_end),
932 				  atop(16 * 1024 * 1024), atop(avail_end),
933 				  VM_FREELIST_DEFAULT);
934 	} else {
935 		uvm_page_physload(atop(hole_end), atop(avail_end),
936 				  atop(hole_end), atop(avail_end),
937 				  VM_FREELIST_DEFAULT);
938 	}
939 #endif
940 
941 	/*
942 	 * ensure the TLB is sync'd with reality by flushing it...
943 	 */
944 
945 	tlbflush();
946 }
947 
948 /*
949  * pmap_init: called from uvm_init, our job is to get the pmap
950  * system ready to manage mappings... this mainly means initing
951  * the pv_entry stuff.
952  */
953 
954 void
pmap_init()955 pmap_init()
956 {
957 	int npages, lcv, i;
958 	vaddr_t addr;
959 	vsize_t s;
960 
961 	/*
962 	 * compute the number of pages we have and then allocate RAM
963 	 * for each pages' pv_head and saved attributes.
964 	 */
965 
966 	npages = 0;
967 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
968 		npages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
969 	s = (vsize_t) (sizeof(struct pv_head) * npages +
970 		       sizeof(char) * npages);
971 	s = round_page(s); /* round up */
972 	addr = (vaddr_t) uvm_km_zalloc(kernel_map, s);
973 	if (addr == 0)
974 		panic("pmap_init: unable to allocate pv_heads");
975 
976 	/*
977 	 * init all pv_head's and attrs in one bzero
978 	 */
979 
980 	/* allocate pv_head stuff first */
981 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
982 		vm_physmem[lcv].pmseg.pvhead = (struct pv_head *) addr;
983 		addr = (vaddr_t)(vm_physmem[lcv].pmseg.pvhead +
984 				 (vm_physmem[lcv].end - vm_physmem[lcv].start));
985 		for (i = 0;
986 		     i < (vm_physmem[lcv].end - vm_physmem[lcv].start); i++) {
987 			simple_lock_init(
988 			    &vm_physmem[lcv].pmseg.pvhead[i].pvh_lock);
989 		}
990 	}
991 
992 	/* now allocate attrs */
993 	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
994 		vm_physmem[lcv].pmseg.attrs = (char *) addr;
995 		addr = (vaddr_t)(vm_physmem[lcv].pmseg.attrs +
996 				 (vm_physmem[lcv].end - vm_physmem[lcv].start));
997 	}
998 
999 	/*
1000 	 * now we need to free enough pv_entry structures to allow us to get
1001 	 * the kmem_map/kmem_object allocated and inited (done after this
1002 	 * function is finished).  to do this we allocate one bootstrap page out
1003 	 * of kernel_map and use it to provide an initial pool of pv_entry
1004 	 * structures.   we never free this page.
1005 	 */
1006 
1007 	pv_initpage = (struct pv_page *) uvm_km_alloc(kernel_map, PAGE_SIZE);
1008 	if (pv_initpage == NULL)
1009 		panic("pmap_init: pv_initpage");
1010 	pv_cachedva = 0;   /* a VA we have allocated but not used yet */
1011 	pv_nfpvents = 0;
1012 	(void) pmap_add_pvpage(pv_initpage, FALSE);
1013 
1014 	/*
1015 	 * done: pmap module is up (and ready for business)
1016 	 */
1017 
1018 	pmap_initialized = TRUE;
1019 }
1020 
1021 /*
1022  * p v _ e n t r y   f u n c t i o n s
1023  */
1024 
1025 /*
1026  * pv_entry allocation functions:
1027  *   the main pv_entry allocation functions are:
1028  *     pmap_alloc_pv: allocate a pv_entry structure
1029  *     pmap_free_pv: free one pv_entry
1030  *     pmap_free_pvs: free a list of pv_entrys
1031  *
1032  * the rest are helper functions
1033  */
1034 
1035 /*
1036  * pmap_alloc_pv: inline function to allocate a pv_entry structure
1037  * => we lock pvalloc_lock
1038  * => if we fail, we call out to pmap_alloc_pvpage
1039  * => 3 modes:
1040  *    ALLOCPV_NEED   = we really need a pv_entry, even if we have to steal it
1041  *    ALLOCPV_TRY    = we want a pv_entry, but not enough to steal
1042  *    ALLOCPV_NONEED = we are trying to grow our free list, don't really need
1043  *			one now
1044  *
1045  * "try" is for optional functions like pmap_copy().
1046  */
1047 
1048 struct pv_entry *
pmap_alloc_pv(pmap,mode)1049 pmap_alloc_pv(pmap, mode)
1050 	struct pmap *pmap;
1051 	int mode;
1052 {
1053 	struct pv_page *pvpage;
1054 	struct pv_entry *pv;
1055 
1056 	simple_lock(&pvalloc_lock);
1057 
1058 	if (pv_freepages.tqh_first != NULL) {
1059 		pvpage = pv_freepages.tqh_first;
1060 		pvpage->pvinfo.pvpi_nfree--;
1061 		if (pvpage->pvinfo.pvpi_nfree == 0) {
1062 			/* nothing left in this one? */
1063 			TAILQ_REMOVE(&pv_freepages, pvpage, pvinfo.pvpi_list);
1064 		}
1065 		pv = pvpage->pvinfo.pvpi_pvfree;
1066 #ifdef DIAGNOSTIC
1067 		if (pv == NULL)
1068 			panic("pmap_alloc_pv: pvpi_nfree off");
1069 #endif
1070 		pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
1071 		pv_nfpvents--;  /* took one from pool */
1072 	} else {
1073 		pv = NULL;		/* need more of them */
1074 	}
1075 
1076 	/*
1077 	 * if below low water mark or we didn't get a pv_entry we try and
1078 	 * create more pv_entrys ...
1079 	 */
1080 
1081 	if (pv_nfpvents < PVE_LOWAT || pv == NULL) {
1082 		if (pv == NULL)
1083 			pv = pmap_alloc_pvpage(pmap, (mode == ALLOCPV_TRY) ?
1084 					       mode : ALLOCPV_NEED);
1085 		else
1086 			(void) pmap_alloc_pvpage(pmap, ALLOCPV_NONEED);
1087 	}
1088 
1089 	simple_unlock(&pvalloc_lock);
1090 	return(pv);
1091 }
1092 
1093 /*
1094  * pmap_alloc_pvpage: maybe allocate a new pvpage
1095  *
1096  * if need_entry is false: try and allocate a new pv_page
1097  * if need_entry is true: try and allocate a new pv_page and return a
1098  *	new pv_entry from it.   if we are unable to allocate a pv_page
1099  *	we make a last ditch effort to steal a pv_page from some other
1100  *	mapping.    if that fails, we panic...
1101  *
1102  * => we assume that the caller holds pvalloc_lock
1103  */
1104 
1105 static struct pv_entry *
pmap_alloc_pvpage(pmap,mode)1106 pmap_alloc_pvpage(pmap, mode)
1107 	struct pmap *pmap;
1108 	int mode;
1109 {
1110 	struct vm_page *pg;
1111 	struct pv_page *pvpage;
1112 	int lcv, idx, npg, s;
1113 	struct pv_entry *pv, *cpv, *prevpv;
1114 
1115 	/*
1116 	 * if we need_entry and we've got unused pv_pages, allocate from there
1117 	 */
1118 
1119 	if (mode != ALLOCPV_NONEED && pv_unusedpgs.tqh_first != NULL) {
1120 
1121 		/* move it to pv_freepages list */
1122 		pvpage = pv_unusedpgs.tqh_first;
1123 		TAILQ_REMOVE(&pv_unusedpgs, pvpage, pvinfo.pvpi_list);
1124 		TAILQ_INSERT_HEAD(&pv_freepages, pvpage, pvinfo.pvpi_list);
1125 
1126 		/* allocate a pv_entry */
1127 		pvpage->pvinfo.pvpi_nfree--;	/* can't go to zero */
1128 		pv = pvpage->pvinfo.pvpi_pvfree;
1129 #ifdef DIAGNOSTIC
1130 		if (pv == NULL)
1131 			panic("pmap_alloc_pvpage: pvpi_nfree off");
1132 #endif
1133 		pvpage->pvinfo.pvpi_pvfree = pv->pv_next;
1134 
1135 		pv_nfpvents--;  /* took one from pool */
1136 		return(pv);
1137 	}
1138 
1139 	/*
1140 	 *  see if we've got a cached unmapped VA that we can map a page in.
1141 	 * if not, try to allocate one.
1142 	 */
1143 
1144 	s = splimp();   /* must protect kmem_map/kmem_object with splimp! */
1145 	if (pv_cachedva == 0) {
1146 		pv_cachedva = uvm_km_kmemalloc(kmem_map, uvmexp.kmem_object,
1147 		    NBPG, UVM_KMF_TRYLOCK|UVM_KMF_VALLOC);
1148 		if (pv_cachedva == 0) {
1149 			splx(s);
1150 			goto steal_one;
1151 		}
1152 	}
1153 
1154 	/*
1155 	 * we have a VA, now let's try and allocate a page in the object
1156 	 * note: we are still holding splimp to protect kmem_object
1157 	 */
1158 
1159 	if (!simple_lock_try(&uvmexp.kmem_object->vmobjlock)) {
1160 		splx(s);
1161 		goto steal_one;
1162 	}
1163 
1164 	pg = uvm_pagealloc(uvmexp.kmem_object, pv_cachedva -
1165 			   vm_map_min(kernel_map),
1166 			   NULL, UVM_PGA_USERESERVE);
1167 	if (pg)
1168 		pg->flags &= ~PG_BUSY;	/* never busy */
1169 
1170 	simple_unlock(&uvmexp.kmem_object->vmobjlock);
1171 	splx(s);
1172 	/* splimp now dropped */
1173 
1174 	if (pg == NULL)
1175 		goto steal_one;
1176 
1177 	/*
1178 	 * add a mapping for our new pv_page and free its entrys (save one!)
1179 	 *
1180 	 * NOTE: If we are allocating a PV page for the kernel pmap, the
1181 	 * pmap is already locked!  (...but entering the mapping is safe...)
1182 	 */
1183 
1184 	pmap_kenter_pa(pv_cachedva, VM_PAGE_TO_PHYS(pg),
1185 	    VM_PROT_READ|VM_PROT_WRITE);
1186 	pvpage = (struct pv_page *) pv_cachedva;
1187 	pv_cachedva = 0;
1188 	return(pmap_add_pvpage(pvpage, mode != ALLOCPV_NONEED));
1189 
1190 steal_one:
1191 	/*
1192 	 * if we don't really need a pv_entry right now, we can just return.
1193 	 */
1194 
1195 	if (mode != ALLOCPV_NEED)
1196 		return(NULL);
1197 
1198 	/*
1199 	 * last ditch effort!   we couldn't allocate a free page to make
1200 	 * more pv_entrys so we try and steal one from someone else.
1201 	 */
1202 
1203 	pv = NULL;
1204 	for (lcv = 0 ; pv == NULL && lcv < vm_nphysseg ; lcv++) {
1205 		npg = vm_physmem[lcv].end - vm_physmem[lcv].start;
1206 		for (idx = 0 ; idx < npg ; idx++) {
1207 			struct pv_head *pvhead = vm_physmem[lcv].pmseg.pvhead;
1208 
1209 			if (pvhead->pvh_list == NULL)
1210 				continue;	/* spot check */
1211 			if (!simple_lock_try(&pvhead->pvh_lock))
1212 				continue;
1213 			cpv = prevpv = pvhead->pvh_list;
1214 			while (cpv) {
1215 				if (pmap_try_steal_pv(pvhead, cpv, prevpv))
1216 					break;
1217 				prevpv = cpv;
1218 				cpv = cpv->pv_next;
1219 			}
1220 			simple_unlock(&pvhead->pvh_lock);
1221 			/* got one?  break out of the loop! */
1222 			if (cpv) {
1223 				pv = cpv;
1224 				break;
1225 			}
1226 		}
1227 	}
1228 
1229 	return(pv);
1230 }
1231 
1232 /*
1233  * pmap_try_steal_pv: try and steal a pv_entry from a pmap
1234  *
1235  * => return true if we did it!
1236  */
1237 
1238 static boolean_t
pmap_try_steal_pv(pvh,cpv,prevpv)1239 pmap_try_steal_pv(pvh, cpv, prevpv)
1240 	struct pv_head *pvh;
1241 	struct pv_entry *cpv, *prevpv;
1242 {
1243 	pt_entry_t *ptep;	/* pointer to a PTE */
1244 
1245 	/*
1246 	 * we never steal kernel mappings or mappings from pmaps we can't lock
1247 	 */
1248 
1249 	if (cpv->pv_pmap == pmap_kernel() ||
1250 	    !simple_lock_try(&cpv->pv_pmap->pm_obj.vmobjlock))
1251 		return(FALSE);
1252 
1253 	/*
1254 	 * yes, we can try and steal it.   first we need to remove the
1255 	 * mapping from the pmap.
1256 	 */
1257 
1258 	ptep = pmap_tmpmap_pvepte(cpv);
1259 	if (*ptep & PG_W) {
1260 		ptep = NULL;	/* wired page, avoid stealing this one */
1261 	} else {
1262 		*ptep = 0;		/* zap! */
1263 		if (pmap_is_curpmap(cpv->pv_pmap))
1264 			pmap_update_pg(cpv->pv_va);
1265 		pmap_tmpunmap_pvepte(cpv);
1266 	}
1267 	if (ptep == NULL) {
1268 		simple_unlock(&cpv->pv_pmap->pm_obj.vmobjlock);
1269 		return(FALSE);	/* wired page, abort! */
1270 	}
1271 	cpv->pv_pmap->pm_stats.resident_count--;
1272 	if (cpv->pv_ptp && cpv->pv_ptp->wire_count)
1273 		/* drop PTP's wired count */
1274 		cpv->pv_ptp->wire_count--;
1275 
1276 	/*
1277 	 * XXX: if wire_count goes to one the PTP could be freed, however,
1278 	 * we'd have to lock the page queues (etc.) to do that and it could
1279 	 * cause deadlock headaches.   besides, the pmap we just stole from
1280 	 * may want the mapping back anyway, so leave the PTP around.
1281 	 */
1282 
1283 	/*
1284 	 * now we need to remove the entry from the pvlist
1285 	 */
1286 
1287 	if (cpv == pvh->pvh_list)
1288 		pvh->pvh_list = cpv->pv_next;
1289 	else
1290 		prevpv->pv_next = cpv->pv_next;
1291 	return(TRUE);
1292 }
1293 
1294 /*
1295  * pmap_add_pvpage: add a pv_page's pv_entrys to the free list
1296  *
1297  * => caller must hold pvalloc_lock
1298  * => if need_entry is true, we allocate and return one pv_entry
1299  */
1300 
1301 static struct pv_entry *
pmap_add_pvpage(pvp,need_entry)1302 pmap_add_pvpage(pvp, need_entry)
1303 	struct pv_page *pvp;
1304 	boolean_t need_entry;
1305 {
1306 	int tofree, lcv;
1307 
1308 	/* do we need to return one? */
1309 	tofree = (need_entry) ? PVE_PER_PVPAGE - 1 : PVE_PER_PVPAGE;
1310 
1311 	pvp->pvinfo.pvpi_pvfree = NULL;
1312 	pvp->pvinfo.pvpi_nfree = tofree;
1313 	for (lcv = 0 ; lcv < tofree ; lcv++) {
1314 		pvp->pvents[lcv].pv_next = pvp->pvinfo.pvpi_pvfree;
1315 		pvp->pvinfo.pvpi_pvfree = &pvp->pvents[lcv];
1316 	}
1317 	if (need_entry)
1318 		TAILQ_INSERT_TAIL(&pv_freepages, pvp, pvinfo.pvpi_list);
1319 	else
1320 		TAILQ_INSERT_TAIL(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1321 	pv_nfpvents += tofree;
1322 	return((need_entry) ? &pvp->pvents[lcv] : NULL);
1323 }
1324 
1325 /*
1326  * pmap_free_pv_doit: actually free a pv_entry
1327  *
1328  * => do not call this directly!  instead use either
1329  *    1. pmap_free_pv ==> free a single pv_entry
1330  *    2. pmap_free_pvs => free a list of pv_entrys
1331  * => we must be holding pvalloc_lock
1332  */
1333 
1334 void
pmap_free_pv_doit(pv)1335 pmap_free_pv_doit(pv)
1336 	struct pv_entry *pv;
1337 {
1338 	struct pv_page *pvp;
1339 
1340 	pvp = (struct pv_page *) i386_trunc_page(pv);
1341 	pv_nfpvents++;
1342 	pvp->pvinfo.pvpi_nfree++;
1343 
1344 	/* nfree == 1 => fully allocated page just became partly allocated */
1345 	if (pvp->pvinfo.pvpi_nfree == 1) {
1346 		TAILQ_INSERT_HEAD(&pv_freepages, pvp, pvinfo.pvpi_list);
1347 	}
1348 
1349 	/* free it */
1350 	pv->pv_next = pvp->pvinfo.pvpi_pvfree;
1351 	pvp->pvinfo.pvpi_pvfree = pv;
1352 
1353 	/*
1354 	 * are all pv_page's pv_entry's free?  move it to unused queue.
1355 	 */
1356 
1357 	if (pvp->pvinfo.pvpi_nfree == PVE_PER_PVPAGE) {
1358 		TAILQ_REMOVE(&pv_freepages, pvp, pvinfo.pvpi_list);
1359 		TAILQ_INSERT_HEAD(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1360 	}
1361 }
1362 
1363 /*
1364  * pmap_free_pv: free a single pv_entry
1365  *
1366  * => we gain the pvalloc_lock
1367  */
1368 
1369 void
pmap_free_pv(pmap,pv)1370 pmap_free_pv(pmap, pv)
1371 	struct pmap *pmap;
1372 	struct pv_entry *pv;
1373 {
1374 	simple_lock(&pvalloc_lock);
1375 	pmap_free_pv_doit(pv);
1376 
1377 	/*
1378 	 * Can't free the PV page if the PV entries were associated with
1379 	 * the kernel pmap; the pmap is already locked.
1380 	 */
1381 	if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
1382 	    pmap != pmap_kernel())
1383 		pmap_free_pvpage();
1384 
1385 	simple_unlock(&pvalloc_lock);
1386 }
1387 
1388 /*
1389  * pmap_free_pvs: free a list of pv_entrys
1390  *
1391  * => we gain the pvalloc_lock
1392  */
1393 
1394 void
pmap_free_pvs(pmap,pvs)1395 pmap_free_pvs(pmap, pvs)
1396 	struct pmap *pmap;
1397 	struct pv_entry *pvs;
1398 {
1399 	struct pv_entry *nextpv;
1400 
1401 	simple_lock(&pvalloc_lock);
1402 
1403 	for ( /* null */ ; pvs != NULL ; pvs = nextpv) {
1404 		nextpv = pvs->pv_next;
1405 		pmap_free_pv_doit(pvs);
1406 	}
1407 
1408 	/*
1409 	 * Can't free the PV page if the PV entries were associated with
1410 	 * the kernel pmap; the pmap is already locked.
1411 	 */
1412 	if (pv_nfpvents > PVE_HIWAT && TAILQ_FIRST(&pv_unusedpgs) != NULL &&
1413 	    pmap != pmap_kernel())
1414 		pmap_free_pvpage();
1415 
1416 	simple_unlock(&pvalloc_lock);
1417 }
1418 
1419 
1420 /*
1421  * pmap_free_pvpage: try and free an unused pv_page structure
1422  *
1423  * => assume caller is holding the pvalloc_lock and that
1424  *	there is a page on the pv_unusedpgs list
1425  * => if we can't get a lock on the kmem_map we try again later
1426  * => note: analysis of MI kmem_map usage [i.e. malloc/free] shows
1427  *	that if we can lock the kmem_map then we are not already
1428  *	holding kmem_object's lock.
1429  */
1430 
1431 static void
pmap_free_pvpage()1432 pmap_free_pvpage()
1433 {
1434 	int s;
1435 	struct vm_map *map;
1436 	struct vm_map_entry *dead_entries;
1437 	struct pv_page *pvp;
1438 
1439 	s = splvm(); /* protect kmem_map */
1440 	pvp = TAILQ_FIRST(&pv_unusedpgs);
1441 
1442 	/*
1443 	 * note: watch out for pv_initpage which is allocated out of
1444 	 * kernel_map rather than kmem_map.
1445 	 */
1446 
1447 	if (pvp == pv_initpage)
1448 		map = kernel_map;
1449 	else
1450 		map = kmem_map;
1451 	if (vm_map_lock_try(map)) {
1452 
1453 		/* remove pvp from pv_unusedpgs */
1454 		TAILQ_REMOVE(&pv_unusedpgs, pvp, pvinfo.pvpi_list);
1455 
1456 		/* unmap the page */
1457 		dead_entries = NULL;
1458 		uvm_unmap_remove(map, (vaddr_t)pvp, ((vaddr_t)pvp) + PAGE_SIZE,
1459 		    &dead_entries);
1460 		vm_map_unlock(map);
1461 
1462 		if (dead_entries != NULL)
1463 			uvm_unmap_detach(dead_entries, 0);
1464 
1465 		pv_nfpvents -= PVE_PER_PVPAGE;  /* update free count */
1466 	}
1467 
1468 	if (pvp == pv_initpage)
1469 		/* no more initpage, we've freed it */
1470 		pv_initpage = NULL;
1471 
1472 	splx(s);
1473 }
1474 
1475 /*
1476  * main pv_entry manipulation functions:
1477  *   pmap_enter_pv: enter a mapping onto a pv_head list
1478  *   pmap_remove_pv: remove a mappiing from a pv_head list
1479  *
1480  * NOTE: pmap_enter_pv expects to lock the pvh itself
1481  *       pmap_remove_pv expects te caller to lock the pvh before calling
1482  */
1483 
1484 /*
1485  * pmap_enter_pv: enter a mapping onto a pv_head lst
1486  *
1487  * => caller should hold the proper lock on pmap_main_lock
1488  * => caller should have pmap locked
1489  * => we will gain the lock on the pv_head and allocate the new pv_entry
1490  * => caller should adjust ptp's wire_count before calling
1491  */
1492 
1493 void
pmap_enter_pv(pvh,pve,pmap,va,ptp)1494 pmap_enter_pv(pvh, pve, pmap, va, ptp)
1495 	struct pv_head *pvh;
1496 	struct pv_entry *pve;	/* preallocated pve for us to use */
1497 	struct pmap *pmap;
1498 	vaddr_t va;
1499 	struct vm_page *ptp;	/* PTP in pmap that maps this VA */
1500 {
1501 	pve->pv_pmap = pmap;
1502 	pve->pv_va = va;
1503 	pve->pv_ptp = ptp;			/* NULL for kernel pmap */
1504 	simple_lock(&pvh->pvh_lock);		/* lock pv_head */
1505 	pve->pv_next = pvh->pvh_list;		/* add to ... */
1506 	pvh->pvh_list = pve;			/* ... locked list */
1507 	simple_unlock(&pvh->pvh_lock);		/* unlock, done! */
1508 }
1509 
1510 /*
1511  * pmap_remove_pv: try to remove a mapping from a pv_list
1512  *
1513  * => caller should hold proper lock on pmap_main_lock
1514  * => pmap should be locked
1515  * => caller should hold lock on pv_head [so that attrs can be adjusted]
1516  * => caller should adjust ptp's wire_count and free PTP if needed
1517  * => we return the removed pve
1518  */
1519 
1520 struct pv_entry *
pmap_remove_pv(pvh,pmap,va)1521 pmap_remove_pv(pvh, pmap, va)
1522 	struct pv_head *pvh;
1523 	struct pmap *pmap;
1524 	vaddr_t va;
1525 {
1526 	struct pv_entry *pve, **prevptr;
1527 
1528 	prevptr = &pvh->pvh_list;		/* previous pv_entry pointer */
1529 	pve = *prevptr;
1530 	while (pve) {
1531 		if (pve->pv_pmap == pmap && pve->pv_va == va) {	/* match? */
1532 			*prevptr = pve->pv_next;		/* remove it! */
1533 			break;
1534 		}
1535 		prevptr = &pve->pv_next;		/* previous pointer */
1536 		pve = pve->pv_next;			/* advance */
1537 	}
1538 	return(pve);				/* return removed pve */
1539 }
1540 
1541 /*
1542  * p t p   f u n c t i o n s
1543  */
1544 
1545 /*
1546  * pmap_alloc_ptp: allocate a PTP for a PMAP
1547  *
1548  * => pmap should already be locked by caller
1549  * => we use the ptp's wire_count to count the number of active mappings
1550  *	in the PTP (we start it at one to prevent any chance this PTP
1551  *	will ever leak onto the active/inactive queues)
1552  * => we should not be holding any pv_head locks (in case we are forced
1553  *	to call pmap_steal_ptp())
1554  * => we may need to lock pv_head's if we have to steal a PTP
1555  * => just_try: true if we want a PTP, but not enough to steal one
1556  * 	from another pmap (e.g. during optional functions like pmap_copy)
1557  */
1558 
1559 struct vm_page *
pmap_alloc_ptp(pmap,pde_index,just_try)1560 pmap_alloc_ptp(pmap, pde_index, just_try)
1561 	struct pmap *pmap;
1562 	int pde_index;
1563 	boolean_t just_try;
1564 {
1565 	struct vm_page *ptp;
1566 
1567 	ptp = uvm_pagealloc(&pmap->pm_obj, ptp_i2o(pde_index), NULL,
1568 			    UVM_PGA_USERESERVE|UVM_PGA_ZERO);
1569 	if (ptp == NULL) {
1570 		if (just_try)
1571 			return(NULL);
1572 		ptp = pmap_steal_ptp(&pmap->pm_obj, ptp_i2o(pde_index));
1573 		if (ptp == NULL) {
1574 			return (NULL);
1575 		}
1576 		/* stole one; zero it. */
1577 		pmap_zero_page(ptp);
1578 	}
1579 
1580 	/* got one! */
1581 	ptp->flags &= ~PG_BUSY;	/* never busy */
1582 	ptp->wire_count = 1;	/* no mappings yet */
1583 	pmap->pm_pdir[pde_index] =
1584 		(pd_entry_t) (VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V);
1585 	pmap->pm_stats.resident_count++;	/* count PTP as resident */
1586 	pmap->pm_ptphint = ptp;
1587 	return(ptp);
1588 }
1589 
1590 /*
1591  * pmap_steal_ptp: steal a PTP from any pmap that we can access
1592  *
1593  * => obj is locked by caller.
1594  * => we can throw away mappings at this level (except in the kernel's pmap)
1595  * => stolen PTP is placed in <obj,offset> pmap
1596  * => we lock pv_head's
1597  * => hopefully, this function will be seldom used [much better to have
1598  *	enough free pages around for us to allocate off the free page list]
1599  */
1600 
1601 static struct vm_page *
pmap_steal_ptp(obj,offset)1602 pmap_steal_ptp(obj, offset)
1603 	struct uvm_object *obj;
1604 	vaddr_t offset;
1605 {
1606 	struct vm_page *ptp = NULL;
1607 	struct pmap *firstpmap;
1608 	struct uvm_object *curobj;
1609 	pt_entry_t *ptes;
1610 	int idx, lcv;
1611 	boolean_t caller_locked, we_locked;
1612 
1613 	simple_lock(&pmaps_lock);
1614 	if (pmaps_hand == NULL)
1615 		pmaps_hand = LIST_FIRST(&pmaps);
1616 	firstpmap = pmaps_hand;
1617 
1618 	do { /* while we haven't looped back around to firstpmap */
1619 
1620 		curobj = &pmaps_hand->pm_obj;
1621 		we_locked = FALSE;
1622 		caller_locked = (curobj == obj);
1623 		if (!caller_locked) {
1624 			we_locked = simple_lock_try(&curobj->vmobjlock);
1625 		}
1626 		if (caller_locked || we_locked) {
1627 			ptp = curobj->memq.tqh_first;
1628 			for (/*null*/; ptp != NULL; ptp = ptp->listq.tqe_next) {
1629 
1630 				/*
1631 				 * might have found a PTP we can steal
1632 				 * (unless it has wired pages).
1633 				 */
1634 
1635 				idx = ptp_o2i(ptp->offset);
1636 #ifdef DIAGNOSTIC
1637 				if (VM_PAGE_TO_PHYS(ptp) !=
1638 				    (pmaps_hand->pm_pdir[idx] & PG_FRAME))
1639 					panic("pmap_steal_ptp: PTP mismatch!");
1640 #endif
1641 
1642 				ptes = (pt_entry_t *)
1643 					pmap_tmpmap_pa(VM_PAGE_TO_PHYS(ptp));
1644 				for (lcv = 0 ; lcv < PTES_PER_PTP ; lcv++)
1645 					if ((ptes[lcv] & (PG_V|PG_W)) ==
1646 					    (PG_V|PG_W))
1647 						break;
1648 				if (lcv == PTES_PER_PTP)
1649 					pmap_remove_ptes(pmaps_hand, NULL, ptp,
1650 							 (vaddr_t)ptes,
1651 							 ptp_i2v(idx),
1652 							 ptp_i2v(idx+1));
1653 				pmap_tmpunmap_pa();
1654 
1655 				if (lcv != PTES_PER_PTP)
1656 					/* wired, try next PTP */
1657 					continue;
1658 
1659 				/*
1660 				 * got it!!!
1661 				 */
1662 
1663 				pmaps_hand->pm_pdir[idx] = 0;	/* zap! */
1664 				pmaps_hand->pm_stats.resident_count--;
1665 				if (pmap_is_curpmap(pmaps_hand))
1666 					tlbflush();
1667 				else if (pmap_valid_entry(*APDP_PDE) &&
1668 					 (*APDP_PDE & PG_FRAME) ==
1669 					 pmaps_hand->pm_pdirpa) {
1670 					pmap_update_pg(((vaddr_t)APTE_BASE) +
1671 						       ptp->offset);
1672 				}
1673 
1674 				/* put it in our pmap! */
1675 				uvm_pagerealloc(ptp, obj, offset);
1676 				break;	/* break out of "for" loop */
1677 			}
1678 			if (we_locked) {
1679 				simple_unlock(&curobj->vmobjlock);
1680 			}
1681 		}
1682 
1683 		/* advance the pmaps_hand */
1684 		pmaps_hand = LIST_NEXT(pmaps_hand, pm_list);
1685 		if (pmaps_hand == NULL) {
1686 			pmaps_hand = LIST_FIRST(&pmaps);
1687 		}
1688 
1689 	} while (ptp == NULL && pmaps_hand != firstpmap);
1690 
1691 	simple_unlock(&pmaps_lock);
1692 	return(ptp);
1693 }
1694 
1695 /*
1696  * pmap_get_ptp: get a PTP (if there isn't one, allocate a new one)
1697  *
1698  * => pmap should NOT be pmap_kernel()
1699  * => pmap should be locked
1700  */
1701 
1702 static struct vm_page *
pmap_get_ptp(pmap,pde_index,just_try)1703 pmap_get_ptp(pmap, pde_index, just_try)
1704 	struct pmap *pmap;
1705 	int pde_index;
1706 	boolean_t just_try;
1707 {
1708 	struct vm_page *ptp;
1709 
1710 	if (pmap_valid_entry(pmap->pm_pdir[pde_index])) {
1711 
1712 		/* valid... check hint (saves us a PA->PG lookup) */
1713 		if (pmap->pm_ptphint &&
1714 		    (pmap->pm_pdir[pde_index] & PG_FRAME) ==
1715 		    VM_PAGE_TO_PHYS(pmap->pm_ptphint))
1716 			return(pmap->pm_ptphint);
1717 
1718 		ptp = uvm_pagelookup(&pmap->pm_obj, ptp_i2o(pde_index));
1719 #ifdef DIAGNOSTIC
1720 		if (ptp == NULL)
1721 			panic("pmap_get_ptp: unmanaged user PTP");
1722 #endif
1723 		pmap->pm_ptphint = ptp;
1724 		return(ptp);
1725 	}
1726 
1727 	/* allocate a new PTP (updates ptphint) */
1728 	return(pmap_alloc_ptp(pmap, pde_index, just_try));
1729 }
1730 
1731 /*
1732  * p m a p  l i f e c y c l e   f u n c t i o n s
1733  */
1734 
1735 /*
1736  * pmap_create: create a pmap
1737  *
1738  * => note: old pmap interface took a "size" args which allowed for
1739  *	the creation of "software only" pmaps (not in bsd).
1740  */
1741 
1742 struct pmap *
pmap_create()1743 pmap_create()
1744 {
1745 	struct pmap *pmap;
1746 
1747 	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
1748 	pmap_pinit(pmap);
1749 	return(pmap);
1750 }
1751 
1752 /*
1753  * pmap_pinit: given a zero'd pmap structure, init it.
1754  */
1755 
1756 void
pmap_pinit(pmap)1757 pmap_pinit(pmap)
1758 	struct pmap *pmap;
1759 {
1760 	/* init uvm_object */
1761 	simple_lock_init(&pmap->pm_obj.vmobjlock);
1762 	pmap->pm_obj.pgops = NULL;	/* currently not a mappable object */
1763 	TAILQ_INIT(&pmap->pm_obj.memq);
1764 	pmap->pm_obj.uo_npages = 0;
1765 	pmap->pm_obj.uo_refs = 1;
1766 	pmap->pm_stats.wired_count = 0;
1767 	pmap->pm_stats.resident_count = 1;	/* count the PDP allocd below */
1768 	pmap->pm_ptphint = NULL;
1769 	pmap->pm_hiexec = 0;
1770 	pmap->pm_flags = 0;
1771 
1772 	/* allocate PDP */
1773 	pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG);
1774 	if (pmap->pm_pdir == NULL)
1775 		panic("pmap_pinit: kernel_map out of virtual space!");
1776 	(void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir,
1777 			    (paddr_t *)&pmap->pm_pdirpa);
1778 
1779 	/* init PDP */
1780 	/* zero init area */
1781 	bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t));
1782 	/* put in recursive PDE to map the PTEs */
1783 	pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW;
1784 
1785 	/* init the LDT */
1786 	pmap->pm_ldt = NULL;
1787 	pmap->pm_ldt_len = 0;
1788 	pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
1789 
1790 	/*
1791 	 * we need to lock pmaps_lock to prevent nkpde from changing on
1792 	 * us.   note that there is no need to splimp to protect us from
1793 	 * malloc since malloc allocates out of a submap and we should have
1794 	 * already allocated kernel PTPs to cover the range...
1795 	 */
1796 	simple_lock(&pmaps_lock);
1797 	/* put in kernel VM PDEs */
1798 	bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN],
1799 	       nkpde * sizeof(pd_entry_t));
1800 	/* zero the rest */
1801 	bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde],
1802 	       NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t)));
1803 	LIST_INSERT_HEAD(&pmaps, pmap, pm_list);
1804 	simple_unlock(&pmaps_lock);
1805 }
1806 
1807 /*
1808  * pmap_destroy: drop reference count on pmap.   free pmap if
1809  *	reference count goes to zero.
1810  */
1811 
1812 void
pmap_destroy(pmap)1813 pmap_destroy(pmap)
1814 	struct pmap *pmap;
1815 {
1816 	int refs;
1817 
1818 	/*
1819 	 * drop reference count
1820 	 */
1821 
1822 	simple_lock(&pmap->pm_obj.vmobjlock);
1823 	refs = --pmap->pm_obj.uo_refs;
1824 	simple_unlock(&pmap->pm_obj.vmobjlock);
1825 	if (refs > 0) {
1826 		return;
1827 	}
1828 
1829 	/*
1830 	 * reference count is zero, free pmap resources and then free pmap.
1831 	 */
1832 
1833 	pmap_release(pmap);
1834 	pool_put(&pmap_pmap_pool, pmap);
1835 }
1836 
1837 /*
1838  * pmap_release: release all resources held by a pmap
1839  *
1840  * => if pmap is still referenced it should be locked
1841  * => XXX: we currently don't expect any busy PTPs because we don't
1842  *    allow anything to map them (except for the kernel's private
1843  *    recursive mapping) or make them busy.
1844  */
1845 
1846 void
pmap_release(pmap)1847 pmap_release(pmap)
1848 	struct pmap *pmap;
1849 {
1850 	struct vm_page *pg;
1851 
1852 	/*
1853 	 * remove it from global list of pmaps
1854 	 */
1855 
1856 	simple_lock(&pmaps_lock);
1857 	if (pmap == pmaps_hand)
1858 		pmaps_hand = LIST_NEXT(pmaps_hand, pm_list);
1859 	LIST_REMOVE(pmap, pm_list);
1860 	simple_unlock(&pmaps_lock);
1861 
1862 	/*
1863 	 * free any remaining PTPs
1864 	 */
1865 
1866 	while (pmap->pm_obj.memq.tqh_first != NULL) {
1867 		pg = pmap->pm_obj.memq.tqh_first;
1868 #ifdef DIAGNOSTIC
1869 		if (pg->flags & PG_BUSY)
1870 			panic("pmap_release: busy page table page");
1871 #endif
1872 		/* pmap_page_protect?  currently no need for it. */
1873 
1874 		pg->wire_count = 0;
1875 		uvm_pagefree(pg);
1876 	}
1877 
1878 	/* XXX: need to flush it out of other processor's APTE space? */
1879 	uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG);
1880 
1881 #ifdef USER_LDT
1882 	if (pmap->pm_flags & PMF_USER_LDT) {
1883 		/*
1884 		 * no need to switch the LDT; this address space is gone,
1885 		 * nothing is using it.
1886 		 *
1887 		 * No need to lock the pmap for ldt_free (or anything else),
1888 		 * we're the last one to use it.
1889 		 */
1890 		ldt_free(pmap);
1891 		uvm_km_free(kernel_map, (vaddr_t)pmap->pm_ldt,
1892 			    pmap->pm_ldt_len * sizeof(union descriptor));
1893 	}
1894 #endif
1895 }
1896 
1897 /*
1898  *	Add a reference to the specified pmap.
1899  */
1900 
1901 void
pmap_reference(pmap)1902 pmap_reference(pmap)
1903 	struct pmap *pmap;
1904 {
1905 	simple_lock(&pmap->pm_obj.vmobjlock);
1906 	pmap->pm_obj.uo_refs++;
1907 	simple_unlock(&pmap->pm_obj.vmobjlock);
1908 }
1909 
1910 #if defined(PMAP_FORK)
1911 /*
1912  * pmap_fork: perform any necessary data structure manipulation when
1913  * a VM space is forked.
1914  */
1915 
1916 void
pmap_fork(pmap1,pmap2)1917 pmap_fork(pmap1, pmap2)
1918 	struct pmap *pmap1, *pmap2;
1919 {
1920 	simple_lock(&pmap1->pm_obj.vmobjlock);
1921 	simple_lock(&pmap2->pm_obj.vmobjlock);
1922 
1923 #ifdef USER_LDT
1924 	/* Copy the LDT, if necessary. */
1925 	if (pmap1->pm_flags & PMF_USER_LDT) {
1926 		union descriptor *new_ldt;
1927 		size_t len;
1928 
1929 		len = pmap1->pm_ldt_len * sizeof(union descriptor);
1930 		new_ldt = (union descriptor *)uvm_km_alloc(kernel_map, len);
1931 		bcopy(pmap1->pm_ldt, new_ldt, len);
1932 		pmap2->pm_ldt = new_ldt;
1933 		pmap2->pm_ldt_len = pmap1->pm_ldt_len;
1934 		pmap2->pm_flags |= PMF_USER_LDT;
1935 		ldt_alloc(pmap2, new_ldt, len);
1936 	}
1937 #endif /* USER_LDT */
1938 
1939 	simple_unlock(&pmap2->pm_obj.vmobjlock);
1940 	simple_unlock(&pmap1->pm_obj.vmobjlock);
1941 }
1942 #endif /* PMAP_FORK */
1943 
1944 #ifdef USER_LDT
1945 /*
1946  * pmap_ldt_cleanup: if the pmap has a local LDT, deallocate it, and
1947  * restore the default.
1948  */
1949 
1950 void
pmap_ldt_cleanup(p)1951 pmap_ldt_cleanup(p)
1952 	struct proc *p;
1953 {
1954 	struct pcb *pcb = &p->p_addr->u_pcb;
1955 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
1956 	union descriptor *old_ldt = NULL;
1957 	size_t len = 0;
1958 
1959 	simple_lock(&pmap->pm_obj.vmobjlock);
1960 
1961 	if (pmap->pm_flags & PMF_USER_LDT) {
1962 		ldt_free(pmap);
1963 		pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
1964 		pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
1965 		if (pcb == curpcb)
1966 			lldt(pcb->pcb_ldt_sel);
1967 		old_ldt = pmap->pm_ldt;
1968 		len = pmap->pm_ldt_len * sizeof(union descriptor);
1969 		pmap->pm_ldt = NULL;
1970 		pmap->pm_ldt_len = 0;
1971 		pmap->pm_flags &= ~PMF_USER_LDT;
1972 	}
1973 
1974 	simple_unlock(&pmap->pm_obj.vmobjlock);
1975 
1976 	if (old_ldt != NULL)
1977 		uvm_km_free(kernel_map, (vaddr_t)old_ldt, len);
1978 }
1979 #endif /* USER_LDT */
1980 
1981 /*
1982  * pmap_activate: activate a process' pmap (fill in %cr3 and LDT info)
1983  *
1984  * => called from cpu_switch()
1985  * => if proc is the curproc, then load it into the MMU
1986  */
1987 
1988 void
pmap_activate(p)1989 pmap_activate(p)
1990 	struct proc *p;
1991 {
1992 	struct pcb *pcb = &p->p_addr->u_pcb;
1993 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1994 
1995 	pcb->pcb_pmap = pmap;
1996 	pcb->pcb_ldt_sel = pmap->pm_ldt_sel;
1997 	pcb->pcb_cr3 = pmap->pm_pdirpa;
1998 	if (p == curproc)
1999 		lcr3(pcb->pcb_cr3);
2000 	if (pcb == curpcb)
2001 		lldt(pcb->pcb_ldt_sel);
2002 }
2003 
2004 /*
2005  * pmap_deactivate: deactivate a process' pmap
2006  *
2007  * => XXX: what should this do, if anything?
2008  */
2009 
2010 void
pmap_deactivate(p)2011 pmap_deactivate(p)
2012 	struct proc *p;
2013 {
2014 }
2015 
2016 /*
2017  * end of lifecycle functions
2018  */
2019 
2020 /*
2021  * some misc. functions
2022  */
2023 
2024 /*
2025  * pmap_extract: extract a PA for the given VA
2026  */
2027 
2028 boolean_t
pmap_extract(pmap,va,pap)2029 pmap_extract(pmap, va, pap)
2030 	struct pmap *pmap;
2031 	vaddr_t va;
2032 	paddr_t *pap;
2033 {
2034 	paddr_t retval;
2035 	pt_entry_t *ptes;
2036 
2037 	if (pmap->pm_pdir[pdei(va)]) {
2038 		ptes = pmap_map_ptes(pmap);
2039 		retval = (paddr_t)(ptes[i386_btop(va)] & PG_FRAME);
2040 		pmap_unmap_ptes(pmap);
2041 		if (pap != NULL)
2042 			*pap = retval | (va & ~PG_FRAME);
2043 		return (TRUE);
2044 	}
2045 	return (FALSE);
2046 }
2047 
2048 /*
2049  * pmap_virtual_space: used during bootup [pmap_steal_memory] to
2050  *	determine the bounds of the kernel virtual addess space.
2051  */
2052 
2053 void
pmap_virtual_space(startp,endp)2054 pmap_virtual_space(startp, endp)
2055 	vaddr_t *startp;
2056 	vaddr_t *endp;
2057 {
2058 	*startp = virtual_avail;
2059 	*endp = virtual_end;
2060 }
2061 
2062 /*
2063  * pmap_zero_page: zero a page
2064  */
2065 void (*pagezero)(void *, size_t) = bzero;
2066 
2067 void
pmap_zero_page(struct vm_page * pg)2068 pmap_zero_page(struct vm_page *pg)
2069 {
2070 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2071 
2072 	simple_lock(&pmap_zero_page_lock);
2073 #ifdef DIAGNOSTIC
2074 	if (*zero_pte)
2075 		panic("pmap_zero_page: lock botch");
2076 #endif
2077 
2078 	*zero_pte = (pa & PG_FRAME) | PG_V | PG_RW;	/* map in */
2079 	pagezero(zerop, PAGE_SIZE);				/* zero */
2080 	*zero_pte = 0;				/* zap! */
2081 	pmap_update_pg((vaddr_t)zerop);		/* flush TLB */
2082 	simple_unlock(&pmap_zero_page_lock);
2083 }
2084 
2085 /*
2086  * pmap_zero_phys: same as pmap_zero_page, but for use before vm_pages are
2087  * initialized.
2088  */
2089 void
pmap_zero_phys(paddr_t pa)2090 pmap_zero_phys(paddr_t pa)
2091 {
2092 	simple_lock(&pmap_zero_page_lock);
2093 #ifdef DIAGNOSTIC
2094 	if (*zero_pte)
2095 		panic("pmap_zero_page: lock botch");
2096 #endif
2097 
2098 	*zero_pte = (pa & PG_FRAME) | PG_V | PG_RW;	/* map in */
2099 	pagezero(zerop, PAGE_SIZE);				/* zero */
2100 	*zero_pte = 0;				/* zap! */
2101 	pmap_update_pg((vaddr_t)zerop);		/* flush TLB */
2102 	simple_unlock(&pmap_zero_page_lock);
2103 }
2104 
2105 /*
2106  * pmap_zero_page_uncached: the same, except uncached.
2107  */
2108 
2109 boolean_t
pmap_zero_page_uncached(pa)2110 pmap_zero_page_uncached(pa)
2111 	paddr_t pa;
2112 {
2113 	simple_lock(&pmap_zero_page_lock);
2114 #ifdef DIAGNOSTIC
2115 	if (*zero_pte)
2116 		panic("pmap_zero_page_uncached: lock botch");
2117 #endif
2118 
2119 	*zero_pte = (pa & PG_FRAME) | PG_V | PG_RW |	/* map in */
2120 	    ((cpu_class != CPUCLASS_386) ? PG_N : 0);
2121 	pagezero(zerop, PAGE_SIZE);				/* zero */
2122 	*zero_pte = 0;					/* zap! */
2123 	pmap_update_pg((vaddr_t)zerop);			/* flush TLB */
2124 	simple_unlock(&pmap_zero_page_lock);
2125 
2126 	return (TRUE);
2127 }
2128 
2129 /*
2130  * pmap_copy_page: copy a page
2131  */
2132 
2133 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)2134 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
2135 {
2136 	paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
2137 	paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
2138 
2139 	simple_lock(&pmap_copy_page_lock);
2140 #ifdef DIAGNOSTIC
2141 	if (*csrc_pte || *cdst_pte)
2142 		panic("pmap_copy_page: lock botch");
2143 #endif
2144 
2145 	*csrc_pte = (srcpa & PG_FRAME) | PG_V | PG_RW;
2146 	*cdst_pte = (dstpa & PG_FRAME) | PG_V | PG_RW;
2147 	bcopy(csrcp, cdstp, PAGE_SIZE);
2148 	*csrc_pte = *cdst_pte = 0;			/* zap! */
2149 	pmap_update_2pg((vaddr_t)csrcp, (vaddr_t)cdstp);
2150 	simple_unlock(&pmap_copy_page_lock);
2151 }
2152 
2153 /*
2154  * p m a p   r e m o v e   f u n c t i o n s
2155  *
2156  * functions that remove mappings
2157  */
2158 
2159 /*
2160  * pmap_remove_ptes: remove PTEs from a PTP
2161  *
2162  * => must have proper locking on pmap_master_lock
2163  * => caller must hold pmap's lock
2164  * => PTP must be mapped into KVA
2165  * => PTP should be null if pmap == pmap_kernel()
2166  */
2167 
2168 static void
pmap_remove_ptes(pmap,pmap_rr,ptp,ptpva,startva,endva)2169 pmap_remove_ptes(pmap, pmap_rr, ptp, ptpva, startva, endva)
2170 	struct pmap *pmap;
2171 	struct pmap_remove_record *pmap_rr;
2172 	struct vm_page *ptp;
2173 	vaddr_t ptpva;
2174 	vaddr_t startva, endva;
2175 {
2176 	struct pv_entry *pv_tofree = NULL;	/* list of pv_entrys to free */
2177 	struct pv_entry *pve;
2178 	pt_entry_t *pte = (pt_entry_t *) ptpva;
2179 	pt_entry_t opte;
2180 	int bank, off;
2181 
2182 	/*
2183 	 * note that ptpva points to the PTE that maps startva.   this may
2184 	 * or may not be the first PTE in the PTP.
2185 	 *
2186 	 * we loop through the PTP while there are still PTEs to look at
2187 	 * and the wire_count is greater than 1 (because we use the wire_count
2188 	 * to keep track of the number of real PTEs in the PTP).
2189 	 */
2190 
2191 	for (/*null*/; startva < endva && (ptp == NULL || ptp->wire_count > 1)
2192 			     ; pte++, startva += NBPG) {
2193 		if (!pmap_valid_entry(*pte))
2194 			continue;			/* VA not mapped */
2195 
2196 		opte = *pte;		/* save the old PTE */
2197 		*pte = 0;			/* zap! */
2198 		if (opte & PG_W)
2199 			pmap->pm_stats.wired_count--;
2200 		pmap->pm_stats.resident_count--;
2201 
2202 		if (pmap_rr) {		/* worried about tlb flushing? */
2203 			if (opte & PG_G) {
2204 				/* PG_G requires this */
2205 				pmap_update_pg(startva);
2206 			} else {
2207 				if (pmap_rr->prr_npages < PMAP_RR_MAX) {
2208 					pmap_rr->prr_vas[pmap_rr->prr_npages++]
2209 						= startva;
2210 				} else {
2211 					if (pmap_rr->prr_npages == PMAP_RR_MAX)
2212 						/* signal an overflow */
2213 						pmap_rr->prr_npages++;
2214 				}
2215 			}
2216 		}
2217 		if (ptp)
2218 			ptp->wire_count--;		/* dropping a PTE */
2219 
2220 		/*
2221 		 * if we are not on a pv_head list we are done.
2222 		 */
2223 
2224 		if ((opte & PG_PVLIST) == 0) {
2225 #ifdef DIAGNOSTIC
2226 			if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off)
2227 			    != -1)
2228 				panic("pmap_remove_ptes: managed page without "
2229 				      "PG_PVLIST for 0x%lx", startva);
2230 #endif
2231 			continue;
2232 		}
2233 
2234 		bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off);
2235 #ifdef DIAGNOSTIC
2236 		if (bank == -1)
2237 			panic("pmap_remove_ptes: unmanaged page marked "
2238 			      "PG_PVLIST, va = 0x%lx, pa = 0x%lx",
2239 			      startva, (u_long)(opte & PG_FRAME));
2240 #endif
2241 
2242 		/* sync R/M bits */
2243 		simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
2244 		vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
2245 		pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap,
2246 				     startva);
2247 		simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
2248 
2249 		if (pve) {
2250 			pve->pv_next = pv_tofree;
2251 			pv_tofree = pve;
2252 		}
2253 
2254 		/* end of "for" loop: time for next pte */
2255 	}
2256 	if (pv_tofree)
2257 		pmap_free_pvs(pmap, pv_tofree);
2258 }
2259 
2260 
2261 /*
2262  * pmap_remove_pte: remove a single PTE from a PTP
2263  *
2264  * => must have proper locking on pmap_master_lock
2265  * => caller must hold pmap's lock
2266  * => PTP must be mapped into KVA
2267  * => PTP should be null if pmap == pmap_kernel()
2268  * => returns true if we removed a mapping
2269  */
2270 
2271 static boolean_t
pmap_remove_pte(pmap,ptp,pte,va)2272 pmap_remove_pte(pmap, ptp, pte, va)
2273 	struct pmap *pmap;
2274 	struct vm_page *ptp;
2275 	pt_entry_t *pte;
2276 	vaddr_t va;
2277 {
2278 	pt_entry_t opte;
2279 	int bank, off;
2280 	struct pv_entry *pve;
2281 
2282 	if (!pmap_valid_entry(*pte))
2283 		return(FALSE);		/* VA not mapped */
2284 
2285 	opte = *pte;			/* save the old PTE */
2286 	*pte = 0;			/* zap! */
2287 
2288 	pmap_exec_account(pmap, va, opte, 0);
2289 
2290 	if (opte & PG_W)
2291 		pmap->pm_stats.wired_count--;
2292 	pmap->pm_stats.resident_count--;
2293 
2294 	if (ptp)
2295 		ptp->wire_count--;		/* dropping a PTE */
2296 
2297 	if (pmap_is_curpmap(pmap))
2298 		pmap_update_pg(va);		/* flush TLB */
2299 
2300 	/*
2301 	 * if we are not on a pv_head list we are done.
2302 	 */
2303 
2304 	if ((opte & PG_PVLIST) == 0) {
2305 #ifdef DIAGNOSTIC
2306 		if (vm_physseg_find(i386_btop(opte & PG_FRAME), &off) != -1)
2307 			panic("pmap_remove_pte: managed page without "
2308 			      "PG_PVLIST for 0x%lx", va);
2309 #endif
2310 		return(TRUE);
2311 	}
2312 
2313 	bank = vm_physseg_find(i386_btop(opte & PG_FRAME), &off);
2314 #ifdef DIAGNOSTIC
2315 	if (bank == -1)
2316 		panic("pmap_remove_pte: unmanaged page marked "
2317 		    "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va,
2318 		    (u_long)(opte & PG_FRAME));
2319 #endif
2320 
2321 	/* sync R/M bits */
2322 	simple_lock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
2323 	vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
2324 	pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va);
2325 	simple_unlock(&vm_physmem[bank].pmseg.pvhead[off].pvh_lock);
2326 
2327 	if (pve)
2328 		pmap_free_pv(pmap, pve);
2329 	return(TRUE);
2330 }
2331 
2332 /*
2333  * pmap_remove: top level mapping removal function
2334  *
2335  * => caller should not be holding any pmap locks
2336  */
2337 
2338 void
pmap_remove(pmap,sva,eva)2339 pmap_remove(pmap, sva, eva)
2340 	struct pmap *pmap;
2341 	vaddr_t sva, eva;
2342 {
2343 	pt_entry_t *ptes;
2344 	boolean_t result;
2345 	paddr_t ptppa;
2346 	vaddr_t blkendva;
2347 	struct vm_page *ptp;
2348 	struct pmap_remove_record pmap_rr, *prr;
2349 
2350 	/*
2351 	 * we lock in the pmap => pv_head direction
2352 	 */
2353 
2354 	PMAP_MAP_TO_HEAD_LOCK();
2355 	ptes = pmap_map_ptes(pmap);	/* locks pmap */
2356 
2357 	/*
2358 	 * removing one page?  take shortcut function.
2359 	 */
2360 
2361 	if (sva + PAGE_SIZE == eva) {
2362 
2363 		if (pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) {
2364 
2365 			/* PA of the PTP */
2366 			ptppa = pmap->pm_pdir[pdei(sva)] & PG_FRAME;
2367 
2368 			/* get PTP if non-kernel mapping */
2369 
2370 			if (pmap == pmap_kernel()) {
2371 				/* we never free kernel PTPs */
2372 				ptp = NULL;
2373 			} else {
2374 				if (pmap->pm_ptphint &&
2375 				    VM_PAGE_TO_PHYS(pmap->pm_ptphint) ==
2376 				    ptppa) {
2377 					ptp = pmap->pm_ptphint;
2378 				} else {
2379 					ptp = PHYS_TO_VM_PAGE(ptppa);
2380 #ifdef DIAGNOSTIC
2381 					if (ptp == NULL)
2382 						panic("pmap_remove: unmanaged "
2383 						      "PTP detected");
2384 #endif
2385 				}
2386 			}
2387 
2388 			/* do it! */
2389 			result = pmap_remove_pte(pmap, ptp,
2390 						 &ptes[i386_btop(sva)], sva);
2391 
2392 			/*
2393 			 * if mapping removed and the PTP is no longer
2394 			 * being used, free it!
2395 			 */
2396 
2397 			if (result && ptp && ptp->wire_count <= 1) {
2398 				pmap->pm_pdir[pdei(sva)] = 0;	/* zap! */
2399 #if defined(I386_CPU)
2400 				/* already dumped whole TLB on i386 */
2401 				if (cpu_class != CPUCLASS_386)
2402 #endif
2403 				{
2404 					pmap_update_pg(((vaddr_t) ptes) +
2405 						       ptp->offset);
2406 				}
2407 				pmap->pm_stats.resident_count--;
2408 				if (pmap->pm_ptphint == ptp)
2409 					pmap->pm_ptphint =
2410 					    TAILQ_FIRST(&pmap->pm_obj.memq);
2411 				ptp->wire_count = 0;
2412 				uvm_pagefree(ptp);
2413 			}
2414 		}
2415 
2416 		pmap_unmap_ptes(pmap);		/* unlock pmap */
2417 		PMAP_MAP_TO_HEAD_UNLOCK();
2418 		return;
2419 	}
2420 
2421 	/*
2422 	 * removing a range of pages: we unmap in PTP sized blocks (4MB)
2423 	 *
2424 	 * if we are the currently loaded pmap, we use prr to keep track
2425 	 * of the VAs we unload so that we can flush them out of the tlb.
2426 	 */
2427 
2428 	if (pmap_is_curpmap(pmap)) {
2429 		prr = &pmap_rr;
2430 		prr->prr_npages = 0;
2431 	} else {
2432 		prr = NULL;
2433 	}
2434 
2435 	for (/* null */ ; sva < eva ; sva = blkendva) {
2436 
2437 		/* determine range of block */
2438 		blkendva = i386_round_pdr(sva+1);
2439 		if (blkendva > eva)
2440 			blkendva = eva;
2441 
2442 		/*
2443 		 * XXXCDC: our PTE mappings should never be removed
2444 		 * with pmap_remove!  if we allow this (and why would
2445 		 * we?) then we end up freeing the pmap's page
2446 		 * directory page (PDP) before we are finished using
2447 		 * it when we hit in in the recursive mapping.  this
2448 		 * is BAD.
2449 		 *
2450 		 * long term solution is to move the PTEs out of user
2451 		 * address space.  and into kernel address space (up
2452 		 * with APTE).  then we can set VM_MAXUSER_ADDRESS to
2453 		 * be VM_MAX_ADDRESS.
2454 		 */
2455 
2456 		if (pdei(sva) == PDSLOT_PTE)
2457 			/* XXXCDC: ugly hack to avoid freeing PDP here */
2458 			continue;
2459 
2460 		if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
2461 			/* valid block? */
2462 			continue;
2463 
2464 		/* PA of the PTP */
2465 		ptppa = (pmap->pm_pdir[pdei(sva)] & PG_FRAME);
2466 
2467 		/* get PTP if non-kernel mapping */
2468 		if (pmap == pmap_kernel()) {
2469 			/* we never free kernel PTPs */
2470 			ptp = NULL;
2471 		} else {
2472 			if (pmap->pm_ptphint &&
2473 			    VM_PAGE_TO_PHYS(pmap->pm_ptphint) == ptppa) {
2474 				ptp = pmap->pm_ptphint;
2475 			} else {
2476 				ptp = PHYS_TO_VM_PAGE(ptppa);
2477 #ifdef DIAGNOSTIC
2478 				if (ptp == NULL)
2479 					panic("pmap_remove: unmanaged PTP "
2480 					      "detected");
2481 #endif
2482 			}
2483 		}
2484 		pmap_remove_ptes(pmap, prr, ptp,
2485 				 (vaddr_t)&ptes[i386_btop(sva)], sva, blkendva);
2486 
2487 		/* if PTP is no longer being used, free it! */
2488 		if (ptp && ptp->wire_count <= 1) {
2489 			pmap->pm_pdir[pdei(sva)] = 0;	/* zap! */
2490 			pmap_update_pg( ((vaddr_t) ptes) + ptp->offset);
2491 #if defined(I386_CPU)
2492 			/* cancel possible pending pmap update on i386 */
2493 			if (cpu_class == CPUCLASS_386 && prr)
2494 				prr->prr_npages = 0;
2495 #endif
2496 			pmap->pm_stats.resident_count--;
2497 			if (pmap->pm_ptphint == ptp)	/* update hint? */
2498 				pmap->pm_ptphint =
2499 				    TAILQ_FIRST(&pmap->pm_obj.memq);
2500 			ptp->wire_count = 0;
2501 			uvm_pagefree(ptp);
2502 		}
2503 	}
2504 
2505 	/*
2506 	 * if we kept a removal record and removed some pages update the TLB
2507 	 */
2508 
2509 	if (prr && prr->prr_npages) {
2510 #if defined(I386_CPU)
2511 		if (cpu_class == CPUCLASS_386) {
2512 			tlbflush();
2513 		} else
2514 #endif
2515 		{ /* not I386 */
2516 			if (prr->prr_npages > PMAP_RR_MAX) {
2517 				tlbflush();
2518 			} else {
2519 				while (prr->prr_npages) {
2520 					pmap_update_pg(
2521 					    prr->prr_vas[--prr->prr_npages]);
2522 				}
2523 			}
2524 		} /* not I386 */
2525 	}
2526 	pmap_unmap_ptes(pmap);
2527 	PMAP_MAP_TO_HEAD_UNLOCK();
2528 }
2529 
2530 /*
2531  * pmap_page_remove: remove a managed vm_page from all pmaps that map it
2532  *
2533  * => we set pv_head => pmap locking
2534  * => R/M bits are sync'd back to attrs
2535  */
2536 
2537 void
pmap_page_remove(pg)2538 pmap_page_remove(pg)
2539 	struct vm_page *pg;
2540 {
2541 	int bank, off;
2542 	struct pv_head *pvh;
2543 	struct pv_entry *pve;
2544 	pt_entry_t *ptes, opte;
2545 #if defined(I386_CPU)
2546 	boolean_t needs_update = FALSE;
2547 #endif
2548 
2549 	/* XXX: vm_page should either contain pv_head or have a pointer to it */
2550 	bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
2551 	if (bank == -1) {
2552 		printf("pmap_page_remove: unmanaged page?\n");
2553 		return;
2554 	}
2555 
2556 	pvh = &vm_physmem[bank].pmseg.pvhead[off];
2557 	if (pvh->pvh_list == NULL) {
2558 		return;
2559 	}
2560 
2561 	/* set pv_head => pmap locking */
2562 	PMAP_HEAD_TO_MAP_LOCK();
2563 
2564 	/* XXX: needed if we hold head->map lock? */
2565 	simple_lock(&pvh->pvh_lock);
2566 
2567 	for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) {
2568 		ptes = pmap_map_ptes(pve->pv_pmap);		/* locks pmap */
2569 
2570 #ifdef DIAGNOSTIC
2571 		if (pve->pv_va >= uvm.pager_sva && pve->pv_va < uvm.pager_eva) {
2572 			printf("pmap_page_remove: found pager VA on pv_list\n");
2573 		}
2574 		if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
2575 				    PG_FRAME)
2576 		    != VM_PAGE_TO_PHYS(pve->pv_ptp)) {
2577 			printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n",
2578 			       pg, pve->pv_va, pve->pv_ptp);
2579 			printf("pmap_page_remove: PTP's phys addr: "
2580 			       "actual=%x, recorded=%lx\n",
2581 			       (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] &
2582 				PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp));
2583 			panic("pmap_page_remove: mapped managed page has "
2584 			      "invalid pv_ptp field");
2585 		}
2586 #endif
2587 
2588 		opte = ptes[i386_btop(pve->pv_va)];
2589 		ptes[i386_btop(pve->pv_va)] = 0;		/* zap! */
2590 
2591 		if (opte & PG_W)
2592 			pve->pv_pmap->pm_stats.wired_count--;
2593 		pve->pv_pmap->pm_stats.resident_count--;
2594 
2595 		if (pmap_is_curpmap(pve->pv_pmap)) {
2596 #if defined(I386_CPU)
2597 			if (cpu_class == CPUCLASS_386)
2598 				needs_update = TRUE;
2599 			else
2600 #endif
2601 				pmap_update_pg(pve->pv_va);
2602 		}
2603 
2604 		/* sync R/M bits */
2605 		vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M));
2606 
2607 		/* update the PTP reference count.  free if last reference. */
2608 		if (pve->pv_ptp) {
2609 			pve->pv_ptp->wire_count--;
2610 			if (pve->pv_ptp->wire_count <= 1) {
2611 				/* zap! */
2612 				pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] = 0;
2613 				pmap_update_pg(((vaddr_t)ptes) +
2614 					       pve->pv_ptp->offset);
2615 #if defined(I386_CPU)
2616 				needs_update = FALSE;
2617 #endif
2618 				pve->pv_pmap->pm_stats.resident_count--;
2619 				/* update hint? */
2620 				if (pve->pv_pmap->pm_ptphint == pve->pv_ptp)
2621 					pve->pv_pmap->pm_ptphint =
2622 					    TAILQ_FIRST(&pve->pv_pmap->pm_obj.memq);
2623 				pve->pv_ptp->wire_count = 0;
2624 				uvm_pagefree(pve->pv_ptp);
2625 			}
2626 		}
2627 		pmap_unmap_ptes(pve->pv_pmap);		/* unlocks pmap */
2628 	}
2629 	pmap_free_pvs(NULL, pvh->pvh_list);
2630 	pvh->pvh_list = NULL;
2631 	simple_unlock(&pvh->pvh_lock);
2632 	PMAP_HEAD_TO_MAP_UNLOCK();
2633 #if defined(I386_CPU)
2634 	if (needs_update)
2635 		tlbflush();
2636 #endif
2637 }
2638 
2639 /*
2640  * p m a p   a t t r i b u t e  f u n c t i o n s
2641  * functions that test/change managed page's attributes
2642  * since a page can be mapped multiple times we must check each PTE that
2643  * maps it by going down the pv lists.
2644  */
2645 
2646 /*
2647  * pmap_test_attrs: test a page's attributes
2648  *
2649  * => we set pv_head => pmap locking
2650  */
2651 
2652 boolean_t
pmap_test_attrs(pg,testbits)2653 pmap_test_attrs(pg, testbits)
2654 	struct vm_page *pg;
2655 	int testbits;
2656 {
2657 	int bank, off;
2658 	char *myattrs;
2659 	struct pv_head *pvh;
2660 	struct pv_entry *pve;
2661 	pt_entry_t *ptes, pte;
2662 
2663 	/* XXX: vm_page should either contain pv_head or have a pointer to it */
2664 	bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
2665 	if (bank == -1) {
2666 		printf("pmap_test_attrs: unmanaged page?\n");
2667 		return(FALSE);
2668 	}
2669 
2670 	/*
2671 	 * before locking: see if attributes are already set and if so,
2672 	 * return!
2673 	 */
2674 
2675 	myattrs = &vm_physmem[bank].pmseg.attrs[off];
2676 	if (*myattrs & testbits)
2677 		return(TRUE);
2678 
2679 	/* test to see if there is a list before bothering to lock */
2680 	pvh = &vm_physmem[bank].pmseg.pvhead[off];
2681 	if (pvh->pvh_list == NULL) {
2682 		return(FALSE);
2683 	}
2684 
2685 	/* nope, gonna have to do it the hard way */
2686 	PMAP_HEAD_TO_MAP_LOCK();
2687 	/* XXX: needed if we hold head->map lock? */
2688 	simple_lock(&pvh->pvh_lock);
2689 
2690 	for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0;
2691 	     pve = pve->pv_next) {
2692 		ptes = pmap_map_ptes(pve->pv_pmap);
2693 		pte = ptes[i386_btop(pve->pv_va)];
2694 		pmap_unmap_ptes(pve->pv_pmap);
2695 		*myattrs |= pte;
2696 	}
2697 
2698 	/*
2699 	 * note that we will exit the for loop with a non-null pve if
2700 	 * we have found the bits we are testing for.
2701 	 */
2702 
2703 	simple_unlock(&pvh->pvh_lock);
2704 	PMAP_HEAD_TO_MAP_UNLOCK();
2705 	return((*myattrs & testbits) != 0);
2706 }
2707 
2708 /*
2709  * pmap_change_attrs: change a page's attributes
2710  *
2711  * => we set pv_head => pmap locking
2712  * => we return TRUE if we cleared one of the bits we were asked to
2713  */
2714 
2715 boolean_t
pmap_change_attrs(pg,setbits,clearbits)2716 pmap_change_attrs(pg, setbits, clearbits)
2717 	struct vm_page *pg;
2718 	int setbits, clearbits;
2719 {
2720 	u_int32_t result;
2721 	int bank, off;
2722 	struct pv_head *pvh;
2723 	struct pv_entry *pve;
2724 	pt_entry_t *ptes, npte;
2725 	char *myattrs;
2726 #if defined(I386_CPU)
2727 	boolean_t needs_update = FALSE;
2728 #endif
2729 
2730 	/* XXX: vm_page should either contain pv_head or have a pointer to it */
2731 	bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off);
2732 	if (bank == -1) {
2733 		printf("pmap_change_attrs: unmanaged page?\n");
2734 		return(FALSE);
2735 	}
2736 
2737 	PMAP_HEAD_TO_MAP_LOCK();
2738 	pvh = &vm_physmem[bank].pmseg.pvhead[off];
2739 	/* XXX: needed if we hold head->map lock? */
2740 	simple_lock(&pvh->pvh_lock);
2741 
2742 	myattrs = &vm_physmem[bank].pmseg.attrs[off];
2743 	result = *myattrs & clearbits;
2744 	*myattrs = (*myattrs | setbits) & ~clearbits;
2745 
2746 	for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) {
2747 #ifdef DIAGNOSTIC
2748 		if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)]))
2749 			panic("pmap_change_attrs: mapping without PTP "
2750 			      "detected");
2751 #endif
2752 
2753 		ptes = pmap_map_ptes(pve->pv_pmap);		/* locks pmap */
2754 		npte = ptes[i386_btop(pve->pv_va)];
2755 		result |= (npte & clearbits);
2756 		npte = (npte | setbits) & ~clearbits;
2757 		if (ptes[i386_btop(pve->pv_va)] != npte) {
2758 			ptes[i386_btop(pve->pv_va)] = npte;	/* zap! */
2759 
2760 			if (pmap_is_curpmap(pve->pv_pmap)) {
2761 #if defined(I386_CPU)
2762 				if (cpu_class == CPUCLASS_386)
2763 					needs_update = TRUE;
2764 				else
2765 #endif
2766 					pmap_update_pg(pve->pv_va);
2767 			}
2768 		}
2769 		pmap_unmap_ptes(pve->pv_pmap);		/* unlocks pmap */
2770 	}
2771 
2772 	simple_unlock(&pvh->pvh_lock);
2773 	PMAP_HEAD_TO_MAP_UNLOCK();
2774 
2775 #if defined(I386_CPU)
2776 	if (needs_update)
2777 		tlbflush();
2778 #endif
2779 	return(result != 0);
2780 }
2781 
2782 /*
2783  * p m a p   p r o t e c t i o n   f u n c t i o n s
2784  */
2785 
2786 /*
2787  * pmap_page_protect: change the protection of all recorded mappings
2788  *	of a managed page
2789  *
2790  * => NOTE: this is an inline function in pmap.h
2791  */
2792 
2793 /* see pmap.h */
2794 
2795 /*
2796  * pmap_protect: set the protection in of the pages in a pmap
2797  *
2798  * => NOTE: this is an inline function in pmap.h
2799  */
2800 
2801 /* see pmap.h */
2802 
2803 /*
2804  * pmap_write_protect: write-protect pages in a pmap
2805  */
2806 
2807 void
pmap_write_protect(pmap,sva,eva,prot)2808 pmap_write_protect(pmap, sva, eva, prot)
2809 	struct pmap *pmap;
2810 	vaddr_t sva, eva;
2811 	vm_prot_t prot;
2812 {
2813 	pt_entry_t *ptes, *spte, *epte, npte;
2814 	struct pmap_remove_record pmap_rr, *prr;
2815 	vaddr_t blockend, va;
2816 	u_int32_t md_prot;
2817 
2818 	ptes = pmap_map_ptes(pmap);		/* locks pmap */
2819 
2820 	/* need to worry about TLB? [TLB stores protection bits] */
2821 	if (pmap_is_curpmap(pmap)) {
2822 		prr = &pmap_rr;
2823 		prr->prr_npages = 0;
2824 	} else {
2825 		prr = NULL;
2826 	}
2827 
2828 	/* should be ok, but just in case ... */
2829 	sva &= PG_FRAME;
2830 	eva &= PG_FRAME;
2831 
2832 	for (/* null */ ; sva < eva ; sva = blockend) {
2833 
2834 		blockend = (sva & PD_MASK) + NBPD;
2835 		if (blockend > eva)
2836 			blockend = eva;
2837 
2838 		/*
2839 		 * XXXCDC: our PTE mappings should never be write-protected!
2840 		 *
2841 		 * long term solution is to move the PTEs out of user
2842 		 * address space.  and into kernel address space (up
2843 		 * with APTE).  then we can set VM_MAXUSER_ADDRESS to
2844 		 * be VM_MAX_ADDRESS.
2845 		 */
2846 
2847 		/* XXXCDC: ugly hack to avoid freeing PDP here */
2848 		if (pdei(sva) == PDSLOT_PTE)
2849 			continue;
2850 
2851 		/* empty block? */
2852 		if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
2853 			continue;
2854 
2855 		md_prot = protection_codes[prot];
2856 		if (sva < VM_MAXUSER_ADDRESS)
2857 			md_prot |= PG_u;
2858 		else if (sva < VM_MAX_ADDRESS)
2859 			/* XXX: write-prot our PTES? never! */
2860 			md_prot |= (PG_u | PG_RW);
2861 
2862 		spte = &ptes[i386_btop(sva)];
2863 		epte = &ptes[i386_btop(blockend)];
2864 
2865 		for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) {
2866 
2867 			if (!pmap_valid_entry(*spte))	/* no mapping? */
2868 				continue;
2869 
2870 			npte = (*spte & ~PG_PROT) | md_prot;
2871 
2872 			if (npte != *spte) {
2873 				pmap_exec_account(pmap, sva, *spte, npte);
2874 
2875 				*spte = npte;		/* zap! */
2876 
2877 				if (prr) {    /* worried about tlb flushing? */
2878 					va = i386_ptob(spte - ptes);
2879 					if (npte & PG_G) {
2880 						/* PG_G requires this */
2881 						pmap_update_pg(va);
2882 					} else {
2883 						if (prr->prr_npages <
2884 						    PMAP_RR_MAX) {
2885 							prr->prr_vas[
2886 							    prr->prr_npages++] =
2887 								va;
2888 						} else {
2889 						    if (prr->prr_npages ==
2890 							PMAP_RR_MAX)
2891 							/* signal an overflow */
2892 							    prr->prr_npages++;
2893 						}
2894 					}
2895 				}	/* if (prr) */
2896 			}	/* npte != *spte */
2897 		}	/* for loop */
2898 	}
2899 
2900 	/*
2901 	 * if we kept a removal record and removed some pages update the TLB
2902 	 */
2903 
2904 	if (prr && prr->prr_npages) {
2905 #if defined(I386_CPU)
2906 		if (cpu_class == CPUCLASS_386) {
2907 			tlbflush();
2908 		} else
2909 #endif
2910 		{ /* not I386 */
2911 			if (prr->prr_npages > PMAP_RR_MAX) {
2912 				tlbflush();
2913 			} else {
2914 				while (prr->prr_npages) {
2915 					pmap_update_pg(prr->prr_vas[
2916 						       --prr->prr_npages]);
2917 				}
2918 			}
2919 		} /* not I386 */
2920 	}
2921 	pmap_unmap_ptes(pmap);		/* unlocks pmap */
2922 }
2923 
2924 /*
2925  * end of protection functions
2926  */
2927 
2928 /*
2929  * pmap_unwire: clear the wired bit in the PTE
2930  *
2931  * => mapping should already be in map
2932  */
2933 
2934 void
pmap_unwire(pmap,va)2935 pmap_unwire(pmap, va)
2936 	struct pmap *pmap;
2937 	vaddr_t va;
2938 {
2939 	pt_entry_t *ptes;
2940 
2941 	if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) {
2942 		ptes = pmap_map_ptes(pmap);		/* locks pmap */
2943 
2944 #ifdef DIAGNOSTIC
2945 		if (!pmap_valid_entry(ptes[i386_btop(va)]))
2946 			panic("pmap_unwire: invalid (unmapped) va 0x%lx", va);
2947 #endif
2948 		if ((ptes[i386_btop(va)] & PG_W) != 0) {
2949 			ptes[i386_btop(va)] &= ~PG_W;
2950 			pmap->pm_stats.wired_count--;
2951 		}
2952 #ifdef DIAGNOSTIC
2953 		else {
2954 			printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2955 			       "didn't change!\n", pmap, va);
2956 		}
2957 #endif
2958 		pmap_unmap_ptes(pmap);		/* unlocks map */
2959 	}
2960 #ifdef DIAGNOSTIC
2961 	else {
2962 		panic("pmap_unwire: invalid PDE");
2963 	}
2964 #endif
2965 }
2966 
2967 /*
2968  * pmap_collect: free resources held by a pmap
2969  *
2970  * => optional function.
2971  * => called when a process is swapped out to free memory.
2972  */
2973 
2974 void
pmap_collect(pmap)2975 pmap_collect(pmap)
2976 	struct pmap *pmap;
2977 {
2978 	/*
2979 	 * free all of the pt pages by removing the physical mappings
2980 	 * for its entire address space.
2981 	 */
2982 
2983 	pmap_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS);
2984 }
2985 
2986 /*
2987  * pmap_copy: copy mappings from one pmap to another
2988  *
2989  * => optional function
2990  * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2991  */
2992 
2993 /*
2994  * defined as macro in pmap.h
2995  */
2996 
2997 /*
2998  * pmap_enter: enter a mapping into a pmap
2999  *
3000  * => must be done "now" ... no lazy-evaluation
3001  * => we set pmap => pv_head locking
3002  */
3003 
3004 int
pmap_enter(pmap,va,pa,prot,flags)3005 pmap_enter(pmap, va, pa, prot, flags)
3006 	struct pmap *pmap;
3007 	vaddr_t va;
3008 	paddr_t pa;
3009 	vm_prot_t prot;
3010 	int flags;
3011 {
3012 	pt_entry_t *ptes, opte, npte;
3013 	struct vm_page *ptp;
3014 	struct pv_head *pvh;
3015 	struct pv_entry *pve;
3016 	int bank, off, error;
3017 	boolean_t wired = (flags & PMAP_WIRED) != 0;
3018 
3019 #ifdef DIAGNOSTIC
3020 	/* sanity check: totally out of range? */
3021 	if (va >= VM_MAX_KERNEL_ADDRESS)
3022 		panic("pmap_enter: too big");
3023 
3024 	if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE)
3025 		panic("pmap_enter: trying to map over PDP/APDP!");
3026 
3027 	/* sanity check: kernel PTPs should already have been pre-allocated */
3028 	if (va >= VM_MIN_KERNEL_ADDRESS &&
3029 	    !pmap_valid_entry(pmap->pm_pdir[pdei(va)]))
3030 		panic("pmap_enter: missing kernel PTP!");
3031 #endif
3032 
3033 	/* get lock */
3034 	PMAP_MAP_TO_HEAD_LOCK();
3035 
3036 	/*
3037 	 * map in ptes and get a pointer to our PTP (unless we are the kernel)
3038 	 */
3039 
3040 	ptes = pmap_map_ptes(pmap);		/* locks pmap */
3041 	if (pmap == pmap_kernel()) {
3042 		ptp = NULL;
3043 	} else {
3044 		ptp = pmap_get_ptp(pmap, pdei(va), FALSE);
3045 		if (ptp == NULL) {
3046 			if (flags & PMAP_CANFAIL) {
3047 				error = KERN_RESOURCE_SHORTAGE;
3048 				goto out;
3049 			}
3050 			panic("pmap_enter: get ptp failed");
3051 		}
3052 	}
3053 	opte = ptes[i386_btop(va)];		/* old PTE */
3054 
3055 	/*
3056 	 * is there currently a valid mapping at our VA?
3057 	 */
3058 
3059 	if (pmap_valid_entry(opte)) {
3060 
3061 		/*
3062 		 * first, update pm_stats.  resident count will not
3063 		 * change since we are replacing/changing a valid
3064 		 * mapping.  wired count might change...
3065 		 */
3066 
3067 		if (wired && (opte & PG_W) == 0)
3068 			pmap->pm_stats.wired_count++;
3069 		else if (!wired && (opte & PG_W) != 0)
3070 			pmap->pm_stats.wired_count--;
3071 
3072 		/*
3073 		 * is the currently mapped PA the same as the one we
3074 		 * want to map?
3075 		 */
3076 
3077 		if ((opte & PG_FRAME) == pa) {
3078 
3079 			/* if this is on the PVLIST, sync R/M bit */
3080 			if (opte & PG_PVLIST) {
3081 				bank = vm_physseg_find(atop(pa), &off);
3082 #ifdef DIAGNOSTIC
3083 				if (bank == -1)
3084 					panic("pmap_enter: same pa PG_PVLIST "
3085 					      "mapping with unmanaged page "
3086 					      "pa = 0x%lx (0x%lx)", pa,
3087 					      atop(pa));
3088 #endif
3089 				pvh = &vm_physmem[bank].pmseg.pvhead[off];
3090 				simple_lock(&pvh->pvh_lock);
3091 				vm_physmem[bank].pmseg.attrs[off] |= opte;
3092 				simple_unlock(&pvh->pvh_lock);
3093 			} else {
3094 				pvh = NULL;	/* ensure !PG_PVLIST */
3095 			}
3096 			goto enter_now;
3097 		}
3098 
3099 		/*
3100 		 * changing PAs: we must remove the old one first
3101 		 */
3102 
3103 		/*
3104 		 * if current mapping is on a pvlist,
3105 		 * remove it (sync R/M bits)
3106 		 */
3107 
3108 		if (opte & PG_PVLIST) {
3109 			bank = vm_physseg_find(atop(opte & PG_FRAME), &off);
3110 #ifdef DIAGNOSTIC
3111 			if (bank == -1)
3112 				panic("pmap_enter: PG_PVLIST mapping with "
3113 				      "unmanaged page "
3114 				      "pa = 0x%lx (0x%lx)", pa, atop(pa));
3115 #endif
3116 			pvh = &vm_physmem[bank].pmseg.pvhead[off];
3117 			simple_lock(&pvh->pvh_lock);
3118 			pve = pmap_remove_pv(pvh, pmap, va);
3119 			vm_physmem[bank].pmseg.attrs[off] |= opte;
3120 			simple_unlock(&pvh->pvh_lock);
3121 		} else {
3122 			pve = NULL;
3123 		}
3124 	} else {	/* opte not valid */
3125 		pve = NULL;
3126 		pmap->pm_stats.resident_count++;
3127 		if (wired)
3128 			pmap->pm_stats.wired_count++;
3129 		if (ptp)
3130 			ptp->wire_count++;      /* count # of valid entrys */
3131 	}
3132 
3133 	/*
3134 	 * at this point pm_stats has been updated.   pve is either NULL
3135 	 * or points to a now-free pv_entry structure (the latter case is
3136 	 * if we called pmap_remove_pv above).
3137 	 *
3138 	 * if this entry is to be on a pvlist, enter it now.
3139 	 */
3140 
3141 	bank = vm_physseg_find(atop(pa), &off);
3142 	if (pmap_initialized && bank != -1) {
3143 		pvh = &vm_physmem[bank].pmseg.pvhead[off];
3144 		if (pve == NULL) {
3145 			pve = pmap_alloc_pv(pmap, ALLOCPV_NEED);
3146 			if (pve == NULL) {
3147 				if (flags & PMAP_CANFAIL) {
3148 					error = KERN_RESOURCE_SHORTAGE;
3149 					goto out;
3150 				}
3151 				panic("pmap_enter: no pv entries available");
3152 			}
3153 		}
3154 		/* lock pvh when adding */
3155 		pmap_enter_pv(pvh, pve, pmap, va, ptp);
3156 	} else {
3157 
3158 		/* new mapping is not PG_PVLIST.   free pve if we've got one */
3159 		pvh = NULL;		/* ensure !PG_PVLIST */
3160 		if (pve)
3161 			pmap_free_pv(pmap, pve);
3162 	}
3163 
3164 enter_now:
3165 	/*
3166 	 * at this point pvh is !NULL if we want the PG_PVLIST bit set
3167 	 */
3168 
3169 	npte = pa | protection_codes[prot] | PG_V;
3170 	pmap_exec_account(pmap, va, opte, npte);
3171 	if (pvh)
3172 		npte |= PG_PVLIST;
3173 	if (wired)
3174 		npte |= PG_W;
3175 	if (va < VM_MAXUSER_ADDRESS)
3176 		npte |= PG_u;
3177 	else if (va < VM_MAX_ADDRESS)
3178 		npte |= (PG_u | PG_RW);	/* XXXCDC: no longer needed? */
3179 	if (pmap == pmap_kernel())
3180 		npte |= pmap_pg_g;
3181 
3182 	ptes[i386_btop(va)] = npte;		/* zap! */
3183 
3184 	if ((opte & ~(PG_M|PG_U)) != npte && pmap_is_curpmap(pmap))
3185 		pmap_update_pg(va);
3186 
3187 	error = 0;
3188 
3189 out:
3190 	pmap_unmap_ptes(pmap);
3191 	PMAP_MAP_TO_HEAD_UNLOCK();
3192 
3193 	return error;
3194 }
3195 
3196 /*
3197  * pmap_growkernel: increase usage of KVM space
3198  *
3199  * => we allocate new PTPs for the kernel and install them in all
3200  *	the pmaps on the system.
3201  */
3202 
3203 vaddr_t
pmap_growkernel(maxkvaddr)3204 pmap_growkernel(maxkvaddr)
3205 	vaddr_t maxkvaddr;
3206 {
3207 	struct pmap *kpm = pmap_kernel(), *pm;
3208 	int needed_kpde;   /* needed number of kernel PTPs */
3209 	int s;
3210 	paddr_t ptaddr;
3211 
3212 	needed_kpde = (int)(maxkvaddr - VM_MIN_KERNEL_ADDRESS + (NBPD-1))
3213 		/ NBPD;
3214 	if (needed_kpde <= nkpde)
3215 		goto out;		/* we are OK */
3216 
3217 	/*
3218 	 * whoops!   we need to add kernel PTPs
3219 	 */
3220 
3221 	s = splhigh();	/* to be safe */
3222 	simple_lock(&kpm->pm_obj.vmobjlock);
3223 
3224 	for (/*null*/ ; nkpde < needed_kpde ; nkpde++) {
3225 
3226 		if (uvm.page_init_done == FALSE) {
3227 
3228 			/*
3229 			 * we're growing the kernel pmap early (from
3230 			 * uvm_pageboot_alloc()).  this case must be
3231 			 * handled a little differently.
3232 			 */
3233 
3234 			if (uvm_page_physget(&ptaddr) == FALSE)
3235 				panic("pmap_growkernel: out of memory");
3236 			pmap_zero_phys(ptaddr);
3237 
3238 			kpm->pm_pdir[PDSLOT_KERN + nkpde] =
3239 				ptaddr | PG_RW | PG_V;
3240 
3241 			/* count PTP as resident */
3242 			kpm->pm_stats.resident_count++;
3243 			continue;
3244 		}
3245 
3246 		/*
3247 		 * THIS *MUST* BE CODED SO AS TO WORK IN THE
3248 		 * pmap_initialized == FALSE CASE!  WE MAY BE
3249 		 * INVOKED WHILE pmap_init() IS RUNNING!
3250 		 */
3251 
3252 		if (pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE) == NULL) {
3253 			panic("pmap_growkernel: alloc ptp failed");
3254 		}
3255 
3256 		/* PG_u not for kernel */
3257 		kpm->pm_pdir[PDSLOT_KERN + nkpde] &= ~PG_u;
3258 
3259 		/* distribute new kernel PTP to all active pmaps */
3260 		simple_lock(&pmaps_lock);
3261 		for (pm = pmaps.lh_first; pm != NULL;
3262 		     pm = pm->pm_list.le_next) {
3263 			pm->pm_pdir[PDSLOT_KERN + nkpde] =
3264 				kpm->pm_pdir[PDSLOT_KERN + nkpde];
3265 		}
3266 		simple_unlock(&pmaps_lock);
3267 	}
3268 
3269 	simple_unlock(&kpm->pm_obj.vmobjlock);
3270 	splx(s);
3271 
3272 out:
3273 	return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD));
3274 }
3275 
3276 #ifdef DEBUG
3277 void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
3278 
3279 /*
3280  * pmap_dump: dump all the mappings from a pmap
3281  *
3282  * => caller should not be holding any pmap locks
3283  */
3284 
3285 void
pmap_dump(pmap,sva,eva)3286 pmap_dump(pmap, sva, eva)
3287 	struct pmap *pmap;
3288 	vaddr_t sva, eva;
3289 {
3290 	pt_entry_t *ptes, *pte;
3291 	vaddr_t blkendva;
3292 
3293 	/*
3294 	 * if end is out of range truncate.
3295 	 * if (end == start) update to max.
3296 	 */
3297 
3298 	if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
3299 		eva = VM_MAXUSER_ADDRESS;
3300 
3301 	/*
3302 	 * we lock in the pmap => pv_head direction
3303 	 */
3304 
3305 	PMAP_MAP_TO_HEAD_LOCK();
3306 	ptes = pmap_map_ptes(pmap);	/* locks pmap */
3307 
3308 	/*
3309 	 * dumping a range of pages: we dump in PTP sized blocks (4MB)
3310 	 */
3311 
3312 	for (/* null */ ; sva < eva ; sva = blkendva) {
3313 
3314 		/* determine range of block */
3315 		blkendva = i386_round_pdr(sva+1);
3316 		if (blkendva > eva)
3317 			blkendva = eva;
3318 
3319 		/* valid block? */
3320 		if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)]))
3321 			continue;
3322 
3323 		pte = &ptes[i386_btop(sva)];
3324 		for (/* null */; sva < blkendva ; sva += NBPG, pte++) {
3325 			if (!pmap_valid_entry(*pte))
3326 				continue;
3327 			printf("va %#lx -> pa %#x (pte=%#x)\n",
3328 			       sva, *pte, *pte & PG_FRAME);
3329 		}
3330 	}
3331 	pmap_unmap_ptes(pmap);
3332 	PMAP_MAP_TO_HEAD_UNLOCK();
3333 }
3334 #endif
3335