1 /*	$OpenBSD: uvm_amap.c,v 1.27 2002/05/09 14:14:18 provos Exp $	*/
2 /*	$NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $	*/
3 
4 /*
5  *
6  * Copyright (c) 1997 Charles D. Cranor and Washington University.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Charles D. Cranor and
20  *      Washington University.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * uvm_amap.c: amap operations
38  */
39 
40 /*
41  * this file contains functions that perform operations on amaps.  see
42  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
43  */
44 
45 #undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/proc.h>
50 #include <sys/malloc.h>
51 #include <sys/kernel.h>
52 #include <sys/pool.h>
53 
54 #define UVM_AMAP_C		/* ensure disabled inlines are in */
55 #include <uvm/uvm.h>
56 #include <uvm/uvm_swap.h>
57 
58 /*
59  * pool for allocation of vm_map structures.  note that the pool has
60  * its own simplelock for its protection.  also note that in order to
61  * avoid an endless loop, the amap pool's allocator cannot allocate
62  * memory from an amap (it currently goes through the kernel uobj, so
63  * we are ok).
64  */
65 
66 struct pool uvm_amap_pool;
67 
68 /*
69  * local functions
70  */
71 
72 static struct vm_amap *amap_alloc1(int, int, int);
73 
74 #ifdef UVM_AMAP_PPREF
75 /*
76  * what is ppref?   ppref is an _optional_ amap feature which is used
77  * to keep track of reference counts on a per-page basis.  it is enabled
78  * when UVM_AMAP_PPREF is defined.
79  *
80  * when enabled, an array of ints is allocated for the pprefs.  this
81  * array is allocated only when a partial reference is added to the
82  * map (either by unmapping part of the amap, or gaining a reference
83  * to only a part of an amap).  if the malloc of the array fails
84  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
85  * that we tried to do ppref's but couldn't alloc the array so just
86  * give up (after all, this is an optional feature!).
87  *
88  * the array is divided into page sized "chunks."   for chunks of length 1,
89  * the chunk reference count plus one is stored in that chunk's slot.
90  * for chunks of length > 1 the first slot contains (the reference count
91  * plus one) * -1.    [the negative value indicates that the length is
92  * greater than one.]   the second slot of the chunk contains the length
93  * of the chunk.   here is an example:
94  *
95  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
96  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
97  *              <----------><-><----><-------><----><-><------->
98  * (x = don't care)
99  *
100  * this allows us to allow one int to contain the ref count for the whole
101  * chunk.    note that the "plus one" part is needed because a reference
102  * count of zero is neither positive or negative (need a way to tell
103  * if we've got one zero or a bunch of them).
104  *
105  * here are some in-line functions to help us.
106  */
107 
108 static __inline void pp_getreflen(int *, int, int *, int *);
109 static __inline void pp_setreflen(int *, int, int, int);
110 
111 /*
112  * pp_getreflen: get the reference and length for a specific offset
113  *
114  * => ppref's amap must be locked
115  */
116 static __inline void
pp_getreflen(ppref,offset,refp,lenp)117 pp_getreflen(ppref, offset, refp, lenp)
118 	int *ppref, offset, *refp, *lenp;
119 {
120 
121 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
122 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
123 		*lenp = 1;
124 	} else {
125 		*refp = (ppref[offset] * -1) - 1;
126 		*lenp = ppref[offset+1];
127 	}
128 }
129 
130 /*
131  * pp_setreflen: set the reference and length for a specific offset
132  *
133  * => ppref's amap must be locked
134  */
135 static __inline void
pp_setreflen(ppref,offset,ref,len)136 pp_setreflen(ppref, offset, ref, len)
137 	int *ppref, offset, ref, len;
138 {
139 	if (len == 1) {
140 		ppref[offset] = ref + 1;
141 	} else {
142 		ppref[offset] = (ref + 1) * -1;
143 		ppref[offset+1] = len;
144 	}
145 }
146 #endif
147 
148 /*
149  * amap_init: called at boot time to init global amap data structures
150  */
151 
152 void
amap_init()153 amap_init()
154 
155 {
156 	/*
157 	 * Initialize the vm_amap pool.
158 	 */
159 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
160 	    "amappl", &pool_allocator_nointr);
161 }
162 
163 /*
164  * amap_alloc1: internal function that allocates an amap, but does not
165  *	init the overlay.
166  *
167  * => lock on returned amap is init'd
168  */
169 static inline struct vm_amap *
amap_alloc1(slots,padslots,waitf)170 amap_alloc1(slots, padslots, waitf)
171 	int slots, padslots, waitf;
172 {
173 	struct vm_amap *amap;
174 	int totalslots;
175 
176 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
177 	if (amap == NULL)
178 		return(NULL);
179 
180 	totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
181 	    sizeof(int);
182 	simple_lock_init(&amap->am_l);
183 	amap->am_ref = 1;
184 	amap->am_flags = 0;
185 #ifdef UVM_AMAP_PPREF
186 	amap->am_ppref = NULL;
187 #endif
188 	amap->am_maxslot = totalslots;
189 	amap->am_nslot = slots;
190 	amap->am_nused = 0;
191 
192 	amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
193 	    waitf);
194 	if (amap->am_slots == NULL)
195 		goto fail1;
196 
197 	amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
198 	if (amap->am_bckptr == NULL)
199 		goto fail2;
200 
201 	amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
202 	    M_UVMAMAP, waitf);
203 	if (amap->am_anon == NULL)
204 		goto fail3;
205 
206 	return(amap);
207 
208 fail3:
209 	free(amap->am_bckptr, M_UVMAMAP);
210 fail2:
211 	free(amap->am_slots, M_UVMAMAP);
212 fail1:
213 	pool_put(&uvm_amap_pool, amap);
214 	return (NULL);
215 }
216 
217 /*
218  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
219  *
220  * => caller should ensure sz is a multiple of PAGE_SIZE
221  * => reference count to new amap is set to one
222  * => new amap is returned unlocked
223  */
224 
225 struct vm_amap *
amap_alloc(sz,padsz,waitf)226 amap_alloc(sz, padsz, waitf)
227 	vaddr_t sz, padsz;
228 	int waitf;
229 {
230 	struct vm_amap *amap;
231 	int slots, padslots;
232 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
233 
234 	AMAP_B2SLOT(slots, sz);		/* load slots */
235 	AMAP_B2SLOT(padslots, padsz);
236 
237 	amap = amap_alloc1(slots, padslots, waitf);
238 	if (amap)
239 		memset(amap->am_anon, 0,
240 		    amap->am_maxslot * sizeof(struct vm_anon *));
241 
242 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
243 	return(amap);
244 }
245 
246 
247 /*
248  * amap_free: free an amap
249  *
250  * => the amap must be locked (mainly for simplelock accounting)
251  * => the amap should have a zero reference count and be empty
252  */
253 void
amap_free(amap)254 amap_free(amap)
255 	struct vm_amap *amap;
256 {
257 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
258 
259 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
260 
261 	free(amap->am_slots, M_UVMAMAP);
262 	free(amap->am_bckptr, M_UVMAMAP);
263 	free(amap->am_anon, M_UVMAMAP);
264 #ifdef UVM_AMAP_PPREF
265 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
266 		free(amap->am_ppref, M_UVMAMAP);
267 #endif
268 	amap_unlock(amap);	/* mainly for lock debugging */
269 	pool_put(&uvm_amap_pool, amap);
270 
271 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
272 }
273 
274 /*
275  * amap_extend: extend the size of an amap (if needed)
276  *
277  * => called from uvm_map when we want to extend an amap to cover
278  *    a new mapping (rather than allocate a new one)
279  * => amap should be unlocked (we will lock it)
280  * => to safely extend an amap it should have a reference count of
281  *    one (thus it can't be shared)
282  * => XXXCDC: needs a waitflag or failure return value?
283  * => XXXCDC: support padding at this level?
284  */
285 void
amap_extend(entry,addsize)286 amap_extend(entry, addsize)
287 	vm_map_entry_t entry;
288 	vsize_t addsize;
289 {
290 	struct vm_amap *amap = entry->aref.ar_amap;
291 	int slotoff = entry->aref.ar_pageoff;
292 	int slotmapped, slotadd, slotneed, slotalloc;
293 #ifdef UVM_AMAP_PPREF
294 	int *newppref, *oldppref;
295 #endif
296 	u_int *newsl, *newbck, *oldsl, *oldbck;
297 	struct vm_anon **newover, **oldover;
298 	int slotadded;
299 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
300 
301 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x)", entry,addsize,0,0);
302 
303 	/*
304 	 * first, determine how many slots we need in the amap.  don't
305 	 * forget that ar_pageoff could be non-zero: this means that
306 	 * there are some unused slots before us in the amap.
307 	 */
308 
309 	amap_lock(amap);					/* lock! */
310 
311 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
312 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
313 	slotneed = slotoff + slotmapped + slotadd;
314 
315 	/*
316 	 * case 1: we already have enough slots in the map and thus
317 	 * only need to bump the reference counts on the slots we are
318 	 * adding.
319 	 */
320 
321 	if (amap->am_nslot >= slotneed) {
322 #ifdef UVM_AMAP_PPREF
323 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
324 			amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
325 		}
326 #endif
327 		amap_unlock(amap);
328 		UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d",
329 		    amap, slotneed, 0, 0);
330 		return;				/* done! */
331 	}
332 
333 	/*
334 	 * case 2: we pre-allocated slots for use and we just need to
335 	 * bump nslot up to take account for these slots.
336 	 */
337 	if (amap->am_maxslot >= slotneed) {
338 #ifdef UVM_AMAP_PPREF
339 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
340 			if ((slotoff + slotmapped) < amap->am_nslot)
341 				amap_pp_adjref(amap, slotoff + slotmapped,
342 				    (amap->am_nslot - (slotoff + slotmapped)),
343 				    1);
344 			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
345 			   slotneed - amap->am_nslot);
346 		}
347 #endif
348 		amap->am_nslot = slotneed;
349 		amap_unlock(amap);
350 		/*
351 		 * no need to zero am_anon since that was done at
352 		 * alloc time and we never shrink an allocation.
353 		 */
354 		UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d",
355 		    amap, slotneed, 0, 0);
356 		return;
357 	}
358 
359 	/*
360 	 * case 3: we need to malloc a new amap and copy all the amap
361 	 * data over from old amap to the new one.
362 	 *
363 	 * XXXCDC: could we take advantage of a kernel realloc()?
364 	 */
365 
366 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
367 	slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
368 #ifdef UVM_AMAP_PPREF
369 	newppref = NULL;
370 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
371 		newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP, M_NOWAIT);
372 		if (newppref == NULL) {
373 			/* give up if malloc fails */
374 			free(amap->am_ppref, M_UVMAMAP);
375 			amap->am_ppref = PPREF_NONE;
376 		}
377 	}
378 #endif
379 	newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, M_WAITOK);
380 	newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, M_WAITOK);
381 	newover = malloc(slotalloc * sizeof(struct vm_anon *),
382 	    M_UVMAMAP, M_WAITOK);
383 	amap_lock(amap);			/* re-lock! */
384 	KASSERT(amap->am_maxslot < slotneed);
385 
386 	/*
387 	 * now copy everything over to new malloc'd areas...
388 	 */
389 
390 	slotadded = slotalloc - amap->am_nslot;
391 
392 	/* do am_slots */
393 	oldsl = amap->am_slots;
394 	memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
395 	amap->am_slots = newsl;
396 
397 	/* do am_anon */
398 	oldover = amap->am_anon;
399 	memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
400 	memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * slotadded);
401 	amap->am_anon = newover;
402 
403 	/* do am_bckptr */
404 	oldbck = amap->am_bckptr;
405 	memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
406 	memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */
407 	amap->am_bckptr = newbck;
408 
409 #ifdef UVM_AMAP_PPREF
410 	/* do ppref */
411 	oldppref = amap->am_ppref;
412 	if (newppref) {
413 		memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
414 		memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
415 		amap->am_ppref = newppref;
416 		if ((slotoff + slotmapped) < amap->am_nslot)
417 			amap_pp_adjref(amap, slotoff + slotmapped,
418 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
419 		pp_setreflen(newppref, amap->am_nslot, 1,
420 		    slotneed - amap->am_nslot);
421 	}
422 #endif
423 
424 	/* update master values */
425 	amap->am_nslot = slotneed;
426 	amap->am_maxslot = slotalloc;
427 
428 	/* unlock */
429 	amap_unlock(amap);
430 
431 	/* and free */
432 	free(oldsl, M_UVMAMAP);
433 	free(oldbck, M_UVMAMAP);
434 	free(oldover, M_UVMAMAP);
435 #ifdef UVM_AMAP_PPREF
436 	if (oldppref && oldppref != PPREF_NONE)
437 		free(oldppref, M_UVMAMAP);
438 #endif
439 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
440 	    amap, slotneed, 0, 0);
441 }
442 
443 /*
444  * amap_share_protect: change protection of anons in a shared amap
445  *
446  * for shared amaps, given the current data structure layout, it is
447  * not possible for us to directly locate all maps referencing the
448  * shared anon (to change the protection).  in order to protect data
449  * in shared maps we use pmap_page_protect().  [this is useful for IPC
450  * mechanisms like map entry passing that may want to write-protect
451  * all mappings of a shared amap.]  we traverse am_anon or am_slots
452  * depending on the current state of the amap.
453  *
454  * => entry's map and amap must be locked by the caller
455  */
456 void
amap_share_protect(entry,prot)457 amap_share_protect(entry, prot)
458 	vm_map_entry_t entry;
459 	vm_prot_t prot;
460 {
461 	struct vm_amap *amap = entry->aref.ar_amap;
462 	int slots, lcv, slot, stop;
463 
464 	AMAP_B2SLOT(slots, (entry->end - entry->start));
465 	stop = entry->aref.ar_pageoff + slots;
466 
467 	if (slots < amap->am_nused) {
468 		/* cheaper to traverse am_anon */
469 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
470 			if (amap->am_anon[lcv] == NULL)
471 				continue;
472 			if (amap->am_anon[lcv]->u.an_page != NULL)
473 				pmap_page_protect(amap->am_anon[lcv]->u.an_page,
474 						  prot);
475 		}
476 		return;
477 	}
478 
479 	/* cheaper to traverse am_slots */
480 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
481 		slot = amap->am_slots[lcv];
482 		if (slot < entry->aref.ar_pageoff || slot >= stop)
483 			continue;
484 		if (amap->am_anon[slot]->u.an_page != NULL)
485 			pmap_page_protect(amap->am_anon[slot]->u.an_page, prot);
486 	}
487 	return;
488 }
489 
490 /*
491  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
492  *
493  * => called from amap_unref when the final reference to an amap is
494  *	discarded (i.e. when reference count == 1)
495  * => the amap should be locked (by the caller)
496  */
497 
498 void
amap_wipeout(amap)499 amap_wipeout(amap)
500 	struct vm_amap *amap;
501 {
502 	int lcv, slot;
503 	struct vm_anon *anon;
504 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
505 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
506 
507 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
508 		int refs;
509 
510 		slot = amap->am_slots[lcv];
511 		anon = amap->am_anon[slot];
512 
513 		if (anon == NULL || anon->an_ref == 0)
514 			panic("amap_wipeout: corrupt amap");
515 
516 		simple_lock(&anon->an_lock); /* lock anon */
517 
518 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
519 		    anon->an_ref, 0, 0);
520 
521 		refs = --anon->an_ref;
522 		simple_unlock(&anon->an_lock);
523 		if (refs == 0) {
524 			/*
525 			 * we had the last reference to a vm_anon. free it.
526 			 */
527 			uvm_anfree(anon);
528 		}
529 	}
530 
531 	/*
532 	 * now we free the map
533 	 */
534 
535 	amap->am_ref = 0;	/* ... was one */
536 	amap->am_nused = 0;
537 	amap_free(amap);	/* will unlock and free amap */
538 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
539 }
540 
541 /*
542  * amap_copy: ensure that a map entry's "needs_copy" flag is false
543  *	by copying the amap if necessary.
544  *
545  * => an entry with a null amap pointer will get a new (blank) one.
546  * => the map that the map entry belongs to must be locked by caller.
547  * => the amap currently attached to "entry" (if any) must be unlocked.
548  * => if canchunk is true, then we may clip the entry into a chunk
549  * => "startva" and "endva" are used only if canchunk is true.  they are
550  *     used to limit chunking (e.g. if you have a large space that you
551  *     know you are going to need to allocate amaps for, there is no point
552  *     in allowing that to be chunked)
553  */
554 
555 void
amap_copy(map,entry,waitf,canchunk,startva,endva)556 amap_copy(map, entry, waitf, canchunk, startva, endva)
557 	vm_map_t map;
558 	vm_map_entry_t entry;
559 	int waitf;
560 	boolean_t canchunk;
561 	vaddr_t startva, endva;
562 {
563 	struct vm_amap *amap, *srcamap;
564 	int slots, lcv;
565 	vaddr_t chunksize;
566 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
567 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)",
568 		    map, entry, waitf, 0);
569 
570 	/*
571 	 * is there a map to copy?   if not, create one from scratch.
572 	 */
573 
574 	if (entry->aref.ar_amap == NULL) {
575 
576 		/*
577 		 * check to see if we have a large amap that we can
578 		 * chunk.  we align startva/endva to chunk-sized
579 		 * boundaries and then clip to them.
580 		 */
581 
582 		if (canchunk && atop(entry->end - entry->start) >=
583 		    UVM_AMAP_LARGE) {
584 			/* convert slots to bytes */
585 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
586 			startva = (startva / chunksize) * chunksize;
587 			endva = roundup(endva, chunksize);
588 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
589 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
590 			    endva);
591 			UVM_MAP_CLIP_START(map, entry, startva);
592 			/* watch out for endva wrap-around! */
593 			if (endva >= startva)
594 				UVM_MAP_CLIP_END(map, entry, endva);
595 		}
596 
597 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
598 		entry->start, entry->end, 0, 0);
599 		entry->aref.ar_pageoff = 0;
600 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
601 		    waitf);
602 		if (entry->aref.ar_amap != NULL)
603 			entry->etype &= ~UVM_ET_NEEDSCOPY;
604 		return;
605 	}
606 
607 	/*
608 	 * first check and see if we are the only map entry
609 	 * referencing the amap we currently have.  if so, then we can
610 	 * just take it over rather than copying it.  note that we are
611 	 * reading am_ref with the amap unlocked... the value can only
612 	 * be one if we have the only reference to the amap (via our
613 	 * locked map).  if we are greater than one we fall through to
614 	 * the next case (where we double check the value).
615 	 */
616 
617 	if (entry->aref.ar_amap->am_ref == 1) {
618 		entry->etype &= ~UVM_ET_NEEDSCOPY;
619 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
620 		    0, 0, 0, 0);
621 		return;
622 	}
623 
624 	/*
625 	 * looks like we need to copy the map.
626 	 */
627 
628 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
629 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
630 	AMAP_B2SLOT(slots, entry->end - entry->start);
631 	amap = amap_alloc1(slots, 0, waitf);
632 	if (amap == NULL) {
633 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
634 		return;
635 	}
636 	srcamap = entry->aref.ar_amap;
637 	amap_lock(srcamap);
638 
639 	/*
640 	 * need to double check reference count now that we've got the
641 	 * src amap locked down.  the reference count could have
642 	 * changed while we were in malloc.  if the reference count
643 	 * dropped down to one we take over the old map rather than
644 	 * copying the amap.
645 	 */
646 
647 	if (srcamap->am_ref == 1) {		/* take it over? */
648 		entry->etype &= ~UVM_ET_NEEDSCOPY;
649 		amap->am_ref--;		/* drop final reference to map */
650 		amap_free(amap);	/* dispose of new (unused) amap */
651 		amap_unlock(srcamap);
652 		return;
653 	}
654 
655 	/*
656 	 * we must copy it now.
657 	 */
658 
659 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
660 	for (lcv = 0 ; lcv < slots; lcv++) {
661 		amap->am_anon[lcv] =
662 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
663 		if (amap->am_anon[lcv] == NULL)
664 			continue;
665 		simple_lock(&amap->am_anon[lcv]->an_lock);
666 		amap->am_anon[lcv]->an_ref++;
667 		simple_unlock(&amap->am_anon[lcv]->an_lock);
668 		amap->am_bckptr[lcv] = amap->am_nused;
669 		amap->am_slots[amap->am_nused] = lcv;
670 		amap->am_nused++;
671 	}
672 	memset(&amap->am_anon[lcv], 0,
673 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
674 
675 	/*
676 	 * drop our reference to the old amap (srcamap) and unlock.
677 	 * we know that the reference count on srcamap is greater than
678 	 * one (we checked above), so there is no way we could drop
679 	 * the count to zero.  [and no need to worry about freeing it]
680 	 */
681 
682 	srcamap->am_ref--;
683 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
684 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
685 #ifdef UVM_AMAP_PPREF
686 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
687 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
688 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
689 	}
690 #endif
691 
692 	amap_unlock(srcamap);
693 
694 	/*
695 	 * install new amap.
696 	 */
697 
698 	entry->aref.ar_pageoff = 0;
699 	entry->aref.ar_amap = amap;
700 	entry->etype &= ~UVM_ET_NEEDSCOPY;
701 
702 	/*
703 	 * done!
704 	 */
705 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
706 }
707 
708 /*
709  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
710  *
711  *	called during fork(2) when the parent process has a wired map
712  *	entry.   in that case we want to avoid write-protecting pages
713  *	in the parent's map (e.g. like what you'd do for a COW page)
714  *	so we resolve the COW here.
715  *
716  * => assume parent's entry was wired, thus all pages are resident.
717  * => assume pages that are loaned out (loan_count) are already mapped
718  *	read-only in all maps, and thus no need for us to worry about them
719  * => assume both parent and child vm_map's are locked
720  * => caller passes child's map/entry in to us
721  * => if we run out of memory we will unlock the amap and sleep _with_ the
722  *	parent and child vm_map's locked(!).    we have to do this since
723  *	we are in the middle of a fork(2) and we can't let the parent
724  *	map change until we are done copying all the map entrys.
725  * => XXXCDC: out of memory should cause fork to fail, but there is
726  *	currently no easy way to do this (needs fix)
727  * => page queues must be unlocked (we may lock them)
728  */
729 
730 void
amap_cow_now(map,entry)731 amap_cow_now(map, entry)
732 	struct vm_map *map;
733 	struct vm_map_entry *entry;
734 {
735 	struct vm_amap *amap = entry->aref.ar_amap;
736 	int lcv, slot;
737 	struct vm_anon *anon, *nanon;
738 	struct vm_page *pg, *npg;
739 
740 	/*
741 	 * note that if we unlock the amap then we must ReStart the "lcv" for
742 	 * loop because some other process could reorder the anon's in the
743 	 * am_anon[] array on us while the lock is dropped.
744 	 */
745 ReStart:
746 	amap_lock(amap);
747 
748 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
749 
750 		/*
751 		 * get the page
752 		 */
753 
754 		slot = amap->am_slots[lcv];
755 		anon = amap->am_anon[slot];
756 		simple_lock(&anon->an_lock);
757 		pg = anon->u.an_page;
758 
759 		/*
760 		 * page must be resident since parent is wired
761 		 */
762 
763 		if (pg == NULL)
764 		    panic("amap_cow_now: non-resident wired page in anon %p",
765 			anon);
766 
767 		/*
768 		 * if the anon ref count is one and the page is not loaned,
769 		 * then we are safe (the child has exclusive access to the
770 		 * page).  if the page is loaned, then it must already be
771 		 * mapped read-only.
772 		 *
773 		 * we only need to get involved when these are not true.
774 		 * [note: if loan_count == 0, then the anon must own the page]
775 		 */
776 
777 		if (anon->an_ref > 1 && pg->loan_count == 0) {
778 
779 			/*
780 			 * if the page is busy then we have to unlock, wait for
781 			 * it and then restart.
782 			 */
783 			if (pg->flags & PG_BUSY) {
784 				pg->flags |= PG_WANTED;
785 				amap_unlock(amap);
786 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
787 				    "cownow", 0);
788 				goto ReStart;
789 			}
790 
791 			/*
792 			 * ok, time to do a copy-on-write to a new anon
793 			 */
794 			nanon = uvm_analloc();
795 			if (nanon) {
796 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
797 			} else
798 				npg = NULL;	/* XXX: quiet gcc warning */
799 
800 			if (nanon == NULL || npg == NULL) {
801 				/* out of memory */
802 				/*
803 				 * XXXCDC: we should cause fork to fail, but
804 				 * we can't ...
805 				 */
806 				if (nanon) {
807 					simple_lock(&nanon->an_lock);
808 					uvm_anfree(nanon);
809 				}
810 				simple_unlock(&anon->an_lock);
811 				amap_unlock(amap);
812 				uvm_wait("cownowpage");
813 				goto ReStart;
814 			}
815 
816 			/*
817 			 * got it... now we can copy the data and replace anon
818 			 * with our new one...
819 			 */
820 			uvm_pagecopy(pg, npg);		/* old -> new */
821 			anon->an_ref--;			/* can't drop to zero */
822 			amap->am_anon[slot] = nanon;	/* replace */
823 
824 			/*
825 			 * drop PG_BUSY on new page ... since we have had it's
826 			 * owner locked the whole time it can't be
827 			 * PG_RELEASED | PG_WANTED.
828 			 */
829 			npg->flags &= ~(PG_BUSY|PG_FAKE);
830 			UVM_PAGE_OWN(npg, NULL);
831 			uvm_lock_pageq();
832 			uvm_pageactivate(npg);
833 			uvm_unlock_pageq();
834 		}
835 
836 		simple_unlock(&anon->an_lock);
837 		/*
838 		 * done with this anon, next ...!
839 		 */
840 
841 	}	/* end of 'for' loop */
842 
843 	amap_unlock(amap);
844 }
845 
846 /*
847  * amap_splitref: split a single reference into two separate references
848  *
849  * => called from uvm_map's clip routines
850  * => origref's map should be locked
851  * => origref->ar_amap should be unlocked (we will lock)
852  */
853 void
amap_splitref(origref,splitref,offset)854 amap_splitref(origref, splitref, offset)
855 	struct vm_aref *origref, *splitref;
856 	vaddr_t offset;
857 {
858 	int leftslots;
859 
860 	AMAP_B2SLOT(leftslots, offset);
861 	if (leftslots == 0)
862 		panic("amap_splitref: split at zero offset");
863 
864 	/*
865 	 * lock the amap
866 	 */
867 	amap_lock(origref->ar_amap);
868 
869 	/*
870 	 * now: amap is locked and we have a valid am_mapped array.
871 	 */
872 
873 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
874 		panic("amap_splitref: map size check failed");
875 
876 #ifdef UVM_AMAP_PPREF
877         /*
878 	 * establish ppref before we add a duplicate reference to the amap
879 	 */
880 	if (origref->ar_amap->am_ppref == NULL)
881 		amap_pp_establish(origref->ar_amap);
882 #endif
883 
884 	splitref->ar_amap = origref->ar_amap;
885 	splitref->ar_amap->am_ref++;		/* not a share reference */
886 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
887 
888 	amap_unlock(origref->ar_amap);
889 }
890 
891 #ifdef UVM_AMAP_PPREF
892 
893 /*
894  * amap_pp_establish: add a ppref array to an amap, if possible
895  *
896  * => amap locked by caller
897  */
898 void
amap_pp_establish(amap)899 amap_pp_establish(amap)
900 	struct vm_amap *amap;
901 {
902 
903 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
904 	    M_UVMAMAP, M_NOWAIT);
905 
906 	/*
907 	 * if we fail then we just won't use ppref for this amap
908 	 */
909 	if (amap->am_ppref == NULL) {
910 		amap->am_ppref = PPREF_NONE;	/* not using it */
911 		return;
912 	}
913 
914 	/*
915 	 * init ppref
916 	 */
917 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
918 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
919 	return;
920 }
921 
922 /*
923  * amap_pp_adjref: adjust reference count to a part of an amap using the
924  * per-page reference count array.
925  *
926  * => map and amap locked by caller
927  * => caller must check that ppref != PPREF_NONE before calling
928  */
929 void
amap_pp_adjref(amap,curslot,slotlen,adjval)930 amap_pp_adjref(amap, curslot, slotlen, adjval)
931 	struct vm_amap *amap;
932 	int curslot;
933 	vsize_t slotlen;
934 	int adjval;
935 {
936  	int stopslot, *ppref, lcv, prevlcv;
937  	int ref, len, prevref, prevlen;
938 
939 	stopslot = curslot + slotlen;
940 	ppref = amap->am_ppref;
941  	prevlcv = 0;
942 
943 	/*
944  	 * first advance to the correct place in the ppref array,
945  	 * fragment if needed.
946 	 */
947 
948 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
949 		pp_getreflen(ppref, lcv, &ref, &len);
950 		if (lcv + len > curslot) {     /* goes past start? */
951 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
952 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
953 			len = curslot - lcv;   /* new length of entry @ lcv */
954 		}
955 		prevlcv = lcv;
956 	}
957 	if (lcv != 0)
958 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
959 	else {
960 		/* Ensure that the "prevref == ref" test below always
961 		 * fails, since we're starting from the beginning of
962 		 * the ppref array; that is, there is no previous
963 		 * chunk.
964 		 */
965 		prevref = -1;
966 		prevlen = 0;
967 	}
968 
969 	/*
970 	 * now adjust reference counts in range.  merge the first
971 	 * changed entry with the last unchanged entry if possible.
972 	 */
973 
974 	if (lcv != curslot)
975 		panic("amap_pp_adjref: overshot target");
976 
977 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
978 		pp_getreflen(ppref, lcv, &ref, &len);
979 		if (lcv + len > stopslot) {     /* goes past end? */
980 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
981 			pp_setreflen(ppref, stopslot, ref,
982 			    len - (stopslot - lcv));
983 			len = stopslot - lcv;
984 		}
985 		ref += adjval;
986 		if (ref < 0)
987 			panic("amap_pp_adjref: negative reference count");
988 		if (lcv == prevlcv + prevlen && ref == prevref) {
989 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
990 		} else {
991 			pp_setreflen(ppref, lcv, ref, len);
992 		}
993 		if (ref == 0)
994 			amap_wiperange(amap, lcv, len);
995 	}
996 
997 }
998 
999 /*
1000  * amap_wiperange: wipe out a range of an amap
1001  * [different from amap_wipeout because the amap is kept intact]
1002  *
1003  * => both map and amap must be locked by caller.
1004  */
1005 void
amap_wiperange(amap,slotoff,slots)1006 amap_wiperange(amap, slotoff, slots)
1007 	struct vm_amap *amap;
1008 	int slotoff, slots;
1009 {
1010 	int byanon, lcv, stop, curslot, ptr, slotend = 0;
1011 	struct vm_anon *anon;
1012 
1013 	/*
1014 	 * we can either traverse the amap by am_anon or by am_slots depending
1015 	 * on which is cheaper.    decide now.
1016 	 */
1017 
1018 	if (slots < amap->am_nused) {
1019 		byanon = TRUE;
1020 		lcv = slotoff;
1021 		stop = slotoff + slots;
1022 	} else {
1023 		byanon = FALSE;
1024 		lcv = 0;
1025 		stop = amap->am_nused;
1026 		slotend = slotoff + slots;
1027 	}
1028 
1029 	while (lcv < stop) {
1030 		int refs;
1031 
1032   		if (byanon) {
1033 			curslot = lcv++;	/* lcv advances here */
1034 			if (amap->am_anon[curslot] == NULL)
1035 				continue;
1036 		} else {
1037 			curslot = amap->am_slots[lcv];
1038 			if (curslot < slotoff || curslot >= slotend) {
1039 				lcv++;		/* lcv advances here */
1040 				continue;
1041 			}
1042 			stop--;	/* drop stop, since anon will be removed */
1043 		}
1044 		anon = amap->am_anon[curslot];
1045 
1046 		/*
1047 		 * remove it from the amap
1048 		 */
1049 		amap->am_anon[curslot] = NULL;
1050 		ptr = amap->am_bckptr[curslot];
1051 		if (ptr != (amap->am_nused - 1)) {
1052 			amap->am_slots[ptr] =
1053 			    amap->am_slots[amap->am_nused - 1];
1054 			amap->am_bckptr[amap->am_slots[ptr]] =
1055 			    ptr;    /* back ptr. */
1056 		}
1057 		amap->am_nused--;
1058 
1059 		/*
1060 		 * drop anon reference count
1061 		 */
1062 		simple_lock(&anon->an_lock);
1063 		refs = --anon->an_ref;
1064 		simple_unlock(&anon->an_lock);
1065 		if (refs == 0) {
1066 			/*
1067 			 * we just eliminated the last reference to an anon.
1068 			 * free it.
1069 			 */
1070 			uvm_anfree(anon);
1071 		}
1072 	}
1073 }
1074 
1075 #endif
1076