xref: /trueos/sys/arm/arm/elf_trampoline.c (revision 5868f7205430cd67aa3b655419d3f15f83b70119)
1 /*-
2  * Copyright (c) 2005 Olivier Houchard.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  */
24 
25 /*
26  * Since we are compiled outside of the normal kernel build process, we
27  * need to include opt_global.h manually.
28  */
29 #include "opt_global.h"
30 #include "opt_kernname.h"
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 #include <machine/asm.h>
35 #include <sys/param.h>
36 #include <sys/elf32.h>
37 #include <sys/inflate.h>
38 #include <machine/elf.h>
39 #include <machine/pte.h>
40 #include <machine/cpufunc.h>
41 #include <machine/armreg.h>
42 
43 extern char kernel_start[];
44 extern char kernel_end[];
45 
46 extern void *_end;
47 
48 void _start(void);
49 void __start(void);
50 void __startC(void);
51 
52 extern unsigned int cpufunc_id(void);
53 extern void armv6_idcache_wbinv_all(void);
54 extern void armv7_idcache_wbinv_all(void);
55 extern void do_call(void *, void *, void *, int);
56 
57 #define GZ_HEAD	0xa
58 
59 #if defined(CPU_ARM9)
60 #define cpu_idcache_wbinv_all	arm9_idcache_wbinv_all
61 extern void arm9_idcache_wbinv_all(void);
62 #elif defined(CPU_FA526) || defined(CPU_FA626TE)
63 #define cpu_idcache_wbinv_all	fa526_idcache_wbinv_all
64 extern void fa526_idcache_wbinv_all(void);
65 #elif defined(CPU_ARM9E)
66 #define cpu_idcache_wbinv_all	armv5_ec_idcache_wbinv_all
67 extern void armv5_ec_idcache_wbinv_all(void);
68 #elif defined(CPU_ARM10)
69 #define cpu_idcache_wbinv_all	arm10_idcache_wbinv_all
70 extern void arm10_idcache_wbinv_all(void);
71 #elif defined(CPU_ARM1136) || defined(CPU_ARM1176)
72 #define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
73 #elif defined(CPU_XSCALE_80200) || defined(CPU_XSCALE_80321) || \
74   defined(CPU_XSCALE_PXA2X0) || defined(CPU_XSCALE_IXP425) ||	\
75   defined(CPU_XSCALE_80219)
76 #define cpu_idcache_wbinv_all	xscale_cache_purgeID
77 extern void xscale_cache_purgeID(void);
78 #elif defined(CPU_XSCALE_81342)
79 #define cpu_idcache_wbinv_all	xscalec3_cache_purgeID
80 extern void xscalec3_cache_purgeID(void);
81 #elif defined(CPU_MV_PJ4B)
82 #if !defined(SOC_MV_ARMADAXP)
83 #define cpu_idcache_wbinv_all	armv6_idcache_wbinv_all
84 extern void armv6_idcache_wbinv_all(void);
85 #else
86 #define cpu_idcache_wbinv_all()	armadaxp_idcache_wbinv_all
87 #endif
88 #endif /* CPU_MV_PJ4B */
89 #ifdef CPU_XSCALE_81342
90 #define cpu_l2cache_wbinv_all	xscalec3_l2cache_purge
91 extern void xscalec3_l2cache_purge(void);
92 #elif defined(SOC_MV_KIRKWOOD) || defined(SOC_MV_DISCOVERY)
93 #define cpu_l2cache_wbinv_all	sheeva_l2cache_wbinv_all
94 extern void sheeva_l2cache_wbinv_all(void);
95 #elif defined(CPU_CORTEXA) || defined(CPU_KRAIT)
96 #define cpu_idcache_wbinv_all	armv7_idcache_wbinv_all
97 #define cpu_l2cache_wbinv_all()
98 #else
99 #define cpu_l2cache_wbinv_all()
100 #endif
101 
102 static void armadaxp_idcache_wbinv_all(void);
103 
104 int     arm_picache_size;
105 int     arm_picache_line_size;
106 int     arm_picache_ways;
107 
108 int     arm_pdcache_size;       /* and unified */
109 int     arm_pdcache_line_size = 32;
110 int     arm_pdcache_ways;
111 
112 int     arm_pcache_type;
113 int     arm_pcache_unified;
114 
115 int     arm_dcache_align;
116 int     arm_dcache_align_mask;
117 
118 int     arm_dcache_min_line_size = 32;
119 int     arm_icache_min_line_size = 32;
120 int     arm_idcache_min_line_size = 32;
121 
122 u_int	arm_cache_level;
123 u_int	arm_cache_type[14];
124 u_int	arm_cache_loc;
125 
126 /* Additional cache information local to this file.  Log2 of some of the
127       above numbers.  */
128 static int      arm_dcache_l2_nsets;
129 static int      arm_dcache_l2_assoc;
130 static int      arm_dcache_l2_linesize;
131 
132 
133 int block_userspace_access = 0;
134 extern int arm9_dcache_sets_inc;
135 extern int arm9_dcache_sets_max;
136 extern int arm9_dcache_index_max;
137 extern int arm9_dcache_index_inc;
138 
139 static __inline void *
memcpy(void * dst,const void * src,int len)140 memcpy(void *dst, const void *src, int len)
141 {
142 	const char *s = src;
143     	char *d = dst;
144 
145 	while (len) {
146 		if (0 && len >= 4 && !((vm_offset_t)d & 3) &&
147 		    !((vm_offset_t)s & 3)) {
148 			*(uint32_t *)d = *(uint32_t *)s;
149 			s += 4;
150 			d += 4;
151 			len -= 4;
152 		} else {
153 			*d++ = *s++;
154 			len--;
155 		}
156 	}
157 	return (dst);
158 }
159 
160 static __inline void
bzero(void * addr,int count)161 bzero(void *addr, int count)
162 {
163 	char *tmp = (char *)addr;
164 
165 	while (count > 0) {
166 		if (count >= 4 && !((vm_offset_t)tmp & 3)) {
167 			*(uint32_t *)tmp = 0;
168 			tmp += 4;
169 			count -= 4;
170 		} else {
171 			*tmp = 0;
172 			tmp++;
173 			count--;
174 		}
175 	}
176 }
177 
178 static void arm9_setup(void);
179 
180 void
_startC(void)181 _startC(void)
182 {
183 	int tmp1;
184 	unsigned int sp = ((unsigned int)&_end & ~3) + 4;
185 	unsigned int pc, kernphysaddr;
186 
187 	/*
188 	 * Figure out the physical address the kernel was loaded at.  This
189 	 * assumes the entry point (this code right here) is in the first page,
190 	 * which will always be the case for this trampoline code.
191 	 */
192 	__asm __volatile("mov %0, pc\n"
193 	    : "=r" (pc));
194 	kernphysaddr = pc & ~PAGE_MASK;
195 
196 #if defined(FLASHADDR) && defined(PHYSADDR) && defined(LOADERRAMADDR)
197 	if ((FLASHADDR > LOADERRAMADDR && pc >= FLASHADDR) ||
198 	    (FLASHADDR < LOADERRAMADDR && pc < LOADERRAMADDR)) {
199 		/*
200 		 * We're running from flash, so just copy the whole thing
201 		 * from flash to memory.
202 		 * This is far from optimal, we could do the relocation or
203 		 * the unzipping directly from flash to memory to avoid this
204 		 * needless copy, but it would require to know the flash
205 		 * physical address.
206 		 */
207 		unsigned int target_addr;
208 		unsigned int tmp_sp;
209 		uint32_t src_addr = (uint32_t)&_start - PHYSADDR + FLASHADDR
210 		    + (pc - FLASHADDR - ((uint32_t)&_startC - PHYSADDR)) & 0xfffff000;
211 
212 		target_addr = (unsigned int)&_start - PHYSADDR + LOADERRAMADDR;
213 		tmp_sp = target_addr + 0x100000 +
214 		    (unsigned int)&_end - (unsigned int)&_start;
215 		memcpy((char *)target_addr, (char *)src_addr,
216 		    (unsigned int)&_end - (unsigned int)&_start);
217 		/* Temporary set the sp and jump to the new location. */
218 		__asm __volatile(
219 		    "mov sp, %1\n"
220 		    "mov pc, %0\n"
221 		    : : "r" (target_addr), "r" (tmp_sp));
222 
223 	}
224 #endif
225 #ifdef KZIP
226 	sp += KERNSIZE + 0x100;
227 	sp &= ~(L1_TABLE_SIZE - 1);
228 	sp += 2 * L1_TABLE_SIZE;
229 #endif
230 	sp += 1024 * 1024; /* Should be enough for a stack */
231 
232 	__asm __volatile("adr %0, 2f\n"
233 	    		 "bic %0, %0, #0xff000000\n"
234 			 "and %1, %1, #0xff000000\n"
235 			 "orr %0, %0, %1\n"
236 			 "mrc p15, 0, %1, c1, c0, 0\n"
237 			 "bic %1, %1, #1\n" /* Disable MMU */
238 			 "orr %1, %1, #(4 | 8)\n" /* Add DC enable,
239 						     WBUF enable */
240 			 "orr %1, %1, #0x1000\n" /* Add IC enable */
241 			 "orr %1, %1, #(0x800)\n" /* BPRD enable */
242 
243 			 "mcr p15, 0, %1, c1, c0, 0\n"
244 			 "nop\n"
245 			 "nop\n"
246 			 "nop\n"
247 			 "mov pc, %0\n"
248 			 "2: nop\n"
249 			 "mov sp, %2\n"
250 			 : "=r" (tmp1), "+r" (kernphysaddr), "+r" (sp));
251 #ifndef KZIP
252 #ifdef CPU_ARM9
253 	/* So that idcache_wbinv works; */
254 	if ((cpufunc_id() & 0x0000f000) == 0x00009000)
255 		arm9_setup();
256 #endif
257 #endif
258 	__start();
259 }
260 
261 static void
get_cachetype_cp15()262 get_cachetype_cp15()
263 {
264 	u_int ctype, isize, dsize, cpuid;
265 	u_int clevel, csize, i, sel;
266 	u_int multiplier;
267 	u_char type;
268 
269 	__asm __volatile("mrc p15, 0, %0, c0, c0, 1"
270 		: "=r" (ctype));
271 
272 	cpuid = cpufunc_id();
273 	/*
274 	 * ...and thus spake the ARM ARM:
275 	 *
276 	 * If an <opcode2> value corresponding to an unimplemented or
277 	 * reserved ID register is encountered, the System Control
278 	 * processor returns the value of the main ID register.
279 	 */
280 	if (ctype == cpuid)
281 		goto out;
282 
283 	if (CPU_CT_FORMAT(ctype) == CPU_CT_ARMV7) {
284 		/* Resolve minimal cache line sizes */
285 		arm_dcache_min_line_size = 1 << (CPU_CT_DMINLINE(ctype) + 2);
286 		arm_icache_min_line_size = 1 << (CPU_CT_IMINLINE(ctype) + 2);
287 		arm_idcache_min_line_size =
288 		    (arm_dcache_min_line_size > arm_icache_min_line_size ?
289 		    arm_icache_min_line_size : arm_dcache_min_line_size);
290 
291 		__asm __volatile("mrc p15, 1, %0, c0, c0, 1"
292 		    : "=r" (clevel));
293 		arm_cache_level = clevel;
294 		arm_cache_loc = CPU_CLIDR_LOC(arm_cache_level) + 1;
295 		i = 0;
296 		while ((type = (clevel & 0x7)) && i < 7) {
297 			if (type == CACHE_DCACHE || type == CACHE_UNI_CACHE ||
298 			    type == CACHE_SEP_CACHE) {
299 				sel = i << 1;
300 				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
301 				    : : "r" (sel));
302 				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
303 				    : "=r" (csize));
304 				arm_cache_type[sel] = csize;
305 			}
306 			if (type == CACHE_ICACHE || type == CACHE_SEP_CACHE) {
307 				sel = (i << 1) | 1;
308 				__asm __volatile("mcr p15, 2, %0, c0, c0, 0"
309 				    : : "r" (sel));
310 				__asm __volatile("mrc p15, 1, %0, c0, c0, 0"
311 				    : "=r" (csize));
312 				arm_cache_type[sel] = csize;
313 			}
314 			i++;
315 			clevel >>= 3;
316 		}
317 	} else {
318 		if ((ctype & CPU_CT_S) == 0)
319 			arm_pcache_unified = 1;
320 
321 		/*
322 		 * If you want to know how this code works, go read the ARM ARM.
323 		 */
324 
325 		arm_pcache_type = CPU_CT_CTYPE(ctype);
326 
327 		if (arm_pcache_unified == 0) {
328 			isize = CPU_CT_ISIZE(ctype);
329 			multiplier = (isize & CPU_CT_xSIZE_M) ? 3 : 2;
330 			arm_picache_line_size = 1U << (CPU_CT_xSIZE_LEN(isize) + 3);
331 			if (CPU_CT_xSIZE_ASSOC(isize) == 0) {
332 				if (isize & CPU_CT_xSIZE_M)
333 					arm_picache_line_size = 0; /* not present */
334 				else
335 					arm_picache_ways = 1;
336 			} else {
337 				arm_picache_ways = multiplier <<
338 				    (CPU_CT_xSIZE_ASSOC(isize) - 1);
339 			}
340 			arm_picache_size = multiplier << (CPU_CT_xSIZE_SIZE(isize) + 8);
341 		}
342 
343 		dsize = CPU_CT_DSIZE(ctype);
344 		multiplier = (dsize & CPU_CT_xSIZE_M) ? 3 : 2;
345 		arm_pdcache_line_size = 1U << (CPU_CT_xSIZE_LEN(dsize) + 3);
346 		if (CPU_CT_xSIZE_ASSOC(dsize) == 0) {
347 			if (dsize & CPU_CT_xSIZE_M)
348 				arm_pdcache_line_size = 0; /* not present */
349 			else
350 				arm_pdcache_ways = 1;
351 		} else {
352 			arm_pdcache_ways = multiplier <<
353 			    (CPU_CT_xSIZE_ASSOC(dsize) - 1);
354 		}
355 		arm_pdcache_size = multiplier << (CPU_CT_xSIZE_SIZE(dsize) + 8);
356 
357 		arm_dcache_align = arm_pdcache_line_size;
358 
359 		arm_dcache_l2_assoc = CPU_CT_xSIZE_ASSOC(dsize) + multiplier - 2;
360 		arm_dcache_l2_linesize = CPU_CT_xSIZE_LEN(dsize) + 3;
361 		arm_dcache_l2_nsets = 6 + CPU_CT_xSIZE_SIZE(dsize) -
362 		    CPU_CT_xSIZE_ASSOC(dsize) - CPU_CT_xSIZE_LEN(dsize);
363 
364 	out:
365 		arm_dcache_align_mask = arm_dcache_align - 1;
366 	}
367 }
368 
369 static void
arm9_setup(void)370 arm9_setup(void)
371 {
372 
373 	get_cachetype_cp15();
374 	arm9_dcache_sets_inc = 1U << arm_dcache_l2_linesize;
375 	arm9_dcache_sets_max = (1U << (arm_dcache_l2_linesize +
376 	    arm_dcache_l2_nsets)) - arm9_dcache_sets_inc;
377 	arm9_dcache_index_inc = 1U << (32 - arm_dcache_l2_assoc);
378 	arm9_dcache_index_max = 0U - arm9_dcache_index_inc;
379 }
380 
381 static void
armadaxp_idcache_wbinv_all(void)382 armadaxp_idcache_wbinv_all(void)
383 {
384 	uint32_t feat;
385 
386 	__asm __volatile("mrc p15, 0, %0, c0, c1, 0" : "=r" (feat));
387 	if (feat & ARM_PFR0_THUMBEE_MASK)
388 		armv7_idcache_wbinv_all();
389 	else
390 		armv6_idcache_wbinv_all();
391 
392 }
393 #ifdef KZIP
394 static  unsigned char *orig_input, *i_input, *i_output;
395 
396 
397 static u_int memcnt;		/* Memory allocated: blocks */
398 static size_t memtot;		/* Memory allocated: bytes */
399 /*
400  * Library functions required by inflate().
401  */
402 
403 #define MEMSIZ 0x8000
404 
405 /*
406  * Allocate memory block.
407  */
408 unsigned char *
kzipmalloc(int size)409 kzipmalloc(int size)
410 {
411 	void *ptr;
412 	static u_char mem[MEMSIZ];
413 
414 	if (memtot + size > MEMSIZ)
415 		return NULL;
416 	ptr = mem + memtot;
417 	memtot += size;
418 	memcnt++;
419 	return ptr;
420 }
421 
422 /*
423  * Free allocated memory block.
424  */
425 void
kzipfree(void * ptr)426 kzipfree(void *ptr)
427 {
428 	memcnt--;
429 	if (!memcnt)
430 		memtot = 0;
431 }
432 
433 void
putstr(char * dummy)434 putstr(char *dummy)
435 {
436 }
437 
438 static int
input(void * dummy)439 input(void *dummy)
440 {
441 	if ((size_t)(i_input - orig_input) >= KERNCOMPSIZE) {
442 		return (GZ_EOF);
443 	}
444 	return *i_input++;
445 }
446 
447 static int
output(void * dummy,unsigned char * ptr,unsigned long len)448 output(void *dummy, unsigned char *ptr, unsigned long len)
449 {
450 
451 
452 	memcpy(i_output, ptr, len);
453 	i_output += len;
454 	return (0);
455 }
456 
457 static void *
inflate_kernel(void * kernel,void * startaddr)458 inflate_kernel(void *kernel, void *startaddr)
459 {
460 	struct inflate infl;
461 	unsigned char slide[GZ_WSIZE];
462 
463 	orig_input = kernel;
464 	memcnt = memtot = 0;
465 	i_input = (unsigned char *)kernel + GZ_HEAD;
466 	if (((char *)kernel)[3] & 0x18) {
467 		while (*i_input)
468 			i_input++;
469 		i_input++;
470 	}
471 	i_output = startaddr;
472 	bzero(&infl, sizeof(infl));
473 	infl.gz_input = input;
474 	infl.gz_output = output;
475 	infl.gz_slide = slide;
476 	inflate(&infl);
477 	return ((char *)(((vm_offset_t)i_output & ~3) + 4));
478 }
479 
480 #endif
481 
482 void *
load_kernel(unsigned int kstart,unsigned int curaddr,unsigned int func_end,int d)483 load_kernel(unsigned int kstart, unsigned int curaddr,unsigned int func_end,
484     int d)
485 {
486 	Elf32_Ehdr *eh;
487 	Elf32_Phdr phdr[64] /* XXX */, *php;
488 	Elf32_Shdr shdr[64] /* XXX */;
489 	int i,j;
490 	void *entry_point;
491 	int symtabindex = -1;
492 	int symstrindex = -1;
493 	vm_offset_t lastaddr = 0;
494 	Elf_Addr ssym = 0;
495 	Elf_Dyn *dp;
496 
497 	eh = (Elf32_Ehdr *)kstart;
498 	ssym = 0;
499 	entry_point = (void*)eh->e_entry;
500 	memcpy(phdr, (void *)(kstart + eh->e_phoff ),
501 	    eh->e_phnum * sizeof(phdr[0]));
502 
503 	/* Determine lastaddr. */
504 	for (i = 0; i < eh->e_phnum; i++) {
505 		if (lastaddr < (phdr[i].p_vaddr - KERNVIRTADDR + curaddr
506 		    + phdr[i].p_memsz))
507 			lastaddr = phdr[i].p_vaddr - KERNVIRTADDR +
508 			    curaddr + phdr[i].p_memsz;
509 	}
510 
511 	/* Save the symbol tables, as there're about to be scratched. */
512 	memcpy(shdr, (void *)(kstart + eh->e_shoff),
513 	    sizeof(*shdr) * eh->e_shnum);
514 	if (eh->e_shnum * eh->e_shentsize != 0 &&
515 	    eh->e_shoff != 0) {
516 		for (i = 0; i < eh->e_shnum; i++) {
517 			if (shdr[i].sh_type == SHT_SYMTAB) {
518 				for (j = 0; j < eh->e_phnum; j++) {
519 					if (phdr[j].p_type == PT_LOAD &&
520 					    shdr[i].sh_offset >=
521 					    phdr[j].p_offset &&
522 					    (shdr[i].sh_offset +
523 					     shdr[i].sh_size <=
524 					     phdr[j].p_offset +
525 					     phdr[j].p_filesz)) {
526 						shdr[i].sh_offset = 0;
527 						shdr[i].sh_size = 0;
528 						j = eh->e_phnum;
529 					}
530 				}
531 				if (shdr[i].sh_offset != 0 &&
532 				    shdr[i].sh_size != 0) {
533 					symtabindex = i;
534 					symstrindex = shdr[i].sh_link;
535 				}
536 			}
537 		}
538 		func_end = roundup(func_end, sizeof(long));
539 		if (symtabindex >= 0 && symstrindex >= 0) {
540 			ssym = lastaddr;
541 			if (d) {
542 				memcpy((void *)func_end, (void *)(
543 				    shdr[symtabindex].sh_offset + kstart),
544 				    shdr[symtabindex].sh_size);
545 				memcpy((void *)(func_end +
546 				    shdr[symtabindex].sh_size),
547 				    (void *)(shdr[symstrindex].sh_offset +
548 				    kstart), shdr[symstrindex].sh_size);
549 			} else {
550 				lastaddr += shdr[symtabindex].sh_size;
551 				lastaddr = roundup(lastaddr,
552 				    sizeof(shdr[symtabindex].sh_size));
553 				lastaddr += sizeof(shdr[symstrindex].sh_size);
554 				lastaddr += shdr[symstrindex].sh_size;
555 				lastaddr = roundup(lastaddr,
556 				    sizeof(shdr[symstrindex].sh_size));
557 			}
558 
559 		}
560 	}
561 	if (!d)
562 		return ((void *)lastaddr);
563 
564 	j = eh->e_phnum;
565 	for (i = 0; i < j; i++) {
566 		volatile char c;
567 
568 		if (phdr[i].p_type != PT_LOAD)
569 			continue;
570 		memcpy((void *)(phdr[i].p_vaddr - KERNVIRTADDR + curaddr),
571 		    (void*)(kstart + phdr[i].p_offset), phdr[i].p_filesz);
572 		/* Clean space from oversized segments, eg: bss. */
573 		if (phdr[i].p_filesz < phdr[i].p_memsz)
574 			bzero((void *)(phdr[i].p_vaddr - KERNVIRTADDR +
575 			    curaddr + phdr[i].p_filesz), phdr[i].p_memsz -
576 			    phdr[i].p_filesz);
577 	}
578 	/* Now grab the symbol tables. */
579 	if (symtabindex >= 0 && symstrindex >= 0) {
580 		*(Elf_Size *)lastaddr =
581 		    shdr[symtabindex].sh_size;
582 		lastaddr += sizeof(shdr[symtabindex].sh_size);
583 		memcpy((void*)lastaddr,
584 		    (void *)func_end,
585 		    shdr[symtabindex].sh_size);
586 		lastaddr += shdr[symtabindex].sh_size;
587 		lastaddr = roundup(lastaddr,
588 		    sizeof(shdr[symtabindex].sh_size));
589 		*(Elf_Size *)lastaddr =
590 		    shdr[symstrindex].sh_size;
591 		lastaddr += sizeof(shdr[symstrindex].sh_size);
592 		memcpy((void*)lastaddr,
593 		    (void*)(func_end +
594 			    shdr[symtabindex].sh_size),
595 		    shdr[symstrindex].sh_size);
596 		lastaddr += shdr[symstrindex].sh_size;
597 		lastaddr = roundup(lastaddr,
598    		    sizeof(shdr[symstrindex].sh_size));
599 		*(Elf_Addr *)curaddr = MAGIC_TRAMP_NUMBER;
600 		*((Elf_Addr *)curaddr + 1) = ssym - curaddr + KERNVIRTADDR;
601 		*((Elf_Addr *)curaddr + 2) = lastaddr - curaddr + KERNVIRTADDR;
602 	} else
603 		*(Elf_Addr *)curaddr = 0;
604 	/* Invalidate the instruction cache. */
605 	__asm __volatile("mcr p15, 0, %0, c7, c5, 0\n"
606 	    		 "mcr p15, 0, %0, c7, c10, 4\n"
607 			 : : "r" (curaddr));
608 	__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
609 	    "bic %0, %0, #1\n" /* MMU_ENABLE */
610 	    "mcr p15, 0, %0, c1, c0, 0\n"
611 	    : "=r" (ssym));
612 	/* Jump to the entry point. */
613 	((void(*)(void))(entry_point - KERNVIRTADDR + curaddr))();
614 	__asm __volatile(".globl func_end\n"
615 	    "func_end:");
616 
617 	/* NOTREACHED */
618 	return NULL;
619 }
620 
621 extern char func_end[];
622 
623 
624 #define PMAP_DOMAIN_KERNEL	0 /*
625 				    * Just define it instead of including the
626 				    * whole VM headers set.
627 				    */
628 int __hack;
629 static __inline void
setup_pagetables(unsigned int pt_addr,vm_paddr_t physstart,vm_paddr_t physend,int write_back)630 setup_pagetables(unsigned int pt_addr, vm_paddr_t physstart, vm_paddr_t physend,
631     int write_back)
632 {
633 	unsigned int *pd = (unsigned int *)pt_addr;
634 	vm_paddr_t addr;
635 	int domain = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT;
636 	int tmp;
637 
638 	bzero(pd, L1_TABLE_SIZE);
639 	for (addr = physstart; addr < physend; addr += L1_S_SIZE) {
640 		pd[addr >> L1_S_SHIFT] = L1_TYPE_S|L1_S_C|L1_S_AP(AP_KRW)|
641 		    L1_S_DOM(PMAP_DOMAIN_KERNEL) | addr;
642 		if (write_back && 0)
643 			pd[addr >> L1_S_SHIFT] |= L1_S_B;
644 	}
645 	/* XXX: See below */
646 	if (0xfff00000 < physstart || 0xfff00000 > physend)
647 		pd[0xfff00000 >> L1_S_SHIFT] = L1_TYPE_S|L1_S_AP(AP_KRW)|
648 		    L1_S_DOM(PMAP_DOMAIN_KERNEL)|physstart;
649 	__asm __volatile("mcr p15, 0, %1, c2, c0, 0\n" /* set TTB */
650 	    		 "mcr p15, 0, %1, c8, c7, 0\n" /* Flush TTB */
651 			 "mcr p15, 0, %2, c3, c0, 0\n" /* Set DAR */
652 			 "mrc p15, 0, %0, c1, c0, 0\n"
653 			 "orr %0, %0, #1\n" /* MMU_ENABLE */
654 			 "mcr p15, 0, %0, c1, c0, 0\n"
655 			 "mrc p15, 0, %0, c2, c0, 0\n" /* CPWAIT */
656 			 "mov r0, r0\n"
657 			 "sub pc, pc, #4\n" :
658 			 "=r" (tmp) : "r" (pd), "r" (domain));
659 
660 	/*
661 	 * XXX: This is the most stupid workaround I've ever wrote.
662 	 * For some reason, the KB9202 won't boot the kernel unless
663 	 * we access an address which is not in the
664 	 * 0x20000000 - 0x20ffffff range. I hope I'll understand
665 	 * what's going on later.
666 	 */
667 	__hack = *(volatile int *)0xfffff21c;
668 }
669 
670 void
__start(void)671 __start(void)
672 {
673 	void *curaddr;
674 	void *dst, *altdst;
675 	char *kernel = (char *)&kernel_start;
676 	int sp;
677 	int pt_addr;
678 
679 	__asm __volatile("mov %0, pc"  :
680 	    "=r" (curaddr));
681 	curaddr = (void*)((unsigned int)curaddr & 0xfff00000);
682 #ifdef KZIP
683 	if (*kernel == 0x1f && kernel[1] == 0x8b) {
684 		pt_addr = (((int)&_end + KERNSIZE + 0x100) &
685 		    ~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
686 
687 #ifdef CPU_ARM9
688 		/* So that idcache_wbinv works; */
689 		if ((cpufunc_id() & 0x0000f000) == 0x00009000)
690 			arm9_setup();
691 #endif
692 		setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
693 		    (vm_paddr_t)curaddr + 0x10000000, 1);
694 		/* Gzipped kernel */
695 		dst = inflate_kernel(kernel, &_end);
696 		kernel = (char *)&_end;
697 		altdst = 4 + load_kernel((unsigned int)kernel,
698 		    (unsigned int)curaddr,
699 		    (unsigned int)&func_end + 800 , 0);
700 		if (altdst > dst)
701 			dst = altdst;
702 
703 		/*
704 		 * Disable MMU.  Otherwise, setup_pagetables call below
705 		 * might overwrite the L1 table we are currently using.
706 		 */
707 		cpu_idcache_wbinv_all();
708 		cpu_l2cache_wbinv_all();
709 		__asm __volatile("mrc p15, 0, %0, c1, c0, 0\n"
710 		  "bic %0, %0, #1\n" /* MMU_DISABLE */
711 		  "mcr p15, 0, %0, c1, c0, 0\n"
712 		  :"=r" (pt_addr));
713 	} else
714 #endif
715 		dst = 4 + load_kernel((unsigned int)&kernel_start,
716 	    (unsigned int)curaddr,
717 	    (unsigned int)&func_end, 0);
718 	dst = (void *)(((vm_offset_t)dst & ~3));
719 	pt_addr = ((unsigned int)dst &~(L1_TABLE_SIZE - 1)) + L1_TABLE_SIZE;
720 	setup_pagetables(pt_addr, (vm_paddr_t)curaddr,
721 	    (vm_paddr_t)curaddr + 0x10000000, 0);
722 	sp = pt_addr + L1_TABLE_SIZE + 8192;
723 	sp = sp &~3;
724 	dst = (void *)(sp + 4);
725 	memcpy((void *)dst, (void *)&load_kernel, (unsigned int)&func_end -
726 	    (unsigned int)&load_kernel + 800);
727 	do_call(dst, kernel, dst + (unsigned int)(&func_end) -
728 	    (unsigned int)(&load_kernel) + 800, sp);
729 }
730 
731 #ifdef __ARM_EABI__
732 /* We need to provide these functions but never call them */
733 void __aeabi_unwind_cpp_pr0(void);
734 void __aeabi_unwind_cpp_pr1(void);
735 void __aeabi_unwind_cpp_pr2(void);
736 
737 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr1);
738 __strong_reference(__aeabi_unwind_cpp_pr0, __aeabi_unwind_cpp_pr2);
739 void
__aeabi_unwind_cpp_pr0(void)740 __aeabi_unwind_cpp_pr0(void)
741 {
742 }
743 #endif
744 
745