fb20f65a01
Quick fix for lack of memset(__bss_start, 0, _end-__bss_start) in load_kernel(). If edata is unaligned, the loop will overwrite all memory because r3 and r4 will never be equal. Signed-off-by: Olaf Hering <olaf@aepfle.de> Signed-off-by: Paul Mackerras <paulus@samba.org>
213 lines
4.6 KiB
ArmAsm
213 lines
4.6 KiB
ArmAsm
/*
|
|
* This is the common part of the loader relocation and initialization
|
|
* process. All of the board/processor specific initialization is
|
|
* done before we get here.
|
|
*
|
|
* Author: Tom Rini
|
|
* trini@mvista.com
|
|
* Derived from arch/ppc/boot/prep/head.S (Cort Dougan, many others).
|
|
*
|
|
* 2001-2004 (c) MontaVista, Software, Inc. This file is licensed under
|
|
* the terms of the GNU General Public License version 2. This program
|
|
* is licensed "as is" without any warranty of any kind, whether express
|
|
* or implied.
|
|
*/
|
|
|
|
#include <asm/cache.h>
|
|
#include <asm/ppc_asm.h>
|
|
|
|
#define GETSYM(reg, sym) \
|
|
lis reg, sym@h; ori reg, reg, sym@l
|
|
|
|
.text
|
|
/* We get called from the early initialization code.
|
|
* Register 3 has the address where we were loaded,
|
|
* Register 4 contains any residual data passed from the
|
|
* boot rom.
|
|
*/
|
|
.globl relocate
|
|
relocate:
|
|
/* Save r3, r4 for later.
|
|
* The r8/r11 are legacy registers so I don't have to
|
|
* rewrite the code below :-).
|
|
*/
|
|
mr r8, r3
|
|
mr r11, r4
|
|
|
|
/* compute the size of the whole image in words. */
|
|
GETSYM(r4,start)
|
|
GETSYM(r5,end)
|
|
|
|
addi r5,r5,3 /* round up */
|
|
sub r5,r5,r4 /* end - start */
|
|
srwi r5,r5,2
|
|
mr r7,r5 /* Save for later use. */
|
|
|
|
/*
|
|
* Check if we need to relocate ourselves to the link addr or were
|
|
* we loaded there to begin with.
|
|
*/
|
|
cmpw cr0,r3,r4
|
|
beq start_ldr /* If 0, we don't need to relocate */
|
|
|
|
/* Move this code somewhere safe. This is max(load + size, end)
|
|
* r8 == load address
|
|
*/
|
|
GETSYM(r4, start)
|
|
GETSYM(r5, end)
|
|
|
|
sub r6,r5,r4
|
|
add r6,r8,r6 /* r6 == phys(load + size) */
|
|
|
|
cmpw r5,r6
|
|
bgt 1f
|
|
b 2f
|
|
1:
|
|
mr r6, r5
|
|
2:
|
|
/* dest is in r6 */
|
|
/* Ensure alignment --- this code is precautionary */
|
|
addi r6,r6,4
|
|
li r5,0x0003
|
|
andc r6,r6,r5
|
|
|
|
/* Find physical address and size of do_relocate */
|
|
GETSYM(r5, __relocate_start)
|
|
GETSYM(r4, __relocate_end)
|
|
GETSYM(r3, start)
|
|
|
|
/* Size to copy */
|
|
sub r4,r4,r5
|
|
srwi r4,r4,2
|
|
|
|
/* Src addr to copy (= __relocate_start - start + where_loaded) */
|
|
sub r3,r5,r3
|
|
add r5,r8,r3
|
|
|
|
/* Save dest */
|
|
mr r3, r6
|
|
|
|
/* Do the copy */
|
|
mtctr r4
|
|
3: lwz r4,0(r5)
|
|
stw r4,0(r3)
|
|
addi r3,r3,4
|
|
addi r5,r5,4
|
|
bdnz 3b
|
|
|
|
GETSYM(r4, __relocate_start)
|
|
GETSYM(r5, do_relocate)
|
|
|
|
sub r4,r5,r4 /* Get entry point for do_relocate in */
|
|
add r6,r6,r4 /* relocated section */
|
|
|
|
/* This will return to the relocated do_relocate */
|
|
mtlr r6
|
|
b flush_instruction_cache
|
|
|
|
.section ".relocate_code","xa"
|
|
|
|
do_relocate:
|
|
/* We have 2 cases --- start < load, or start > load
|
|
* This determines whether we copy from the end, or the start.
|
|
* Its easier to have 2 loops than to have paramaterised
|
|
* loops. Sigh.
|
|
*/
|
|
li r6,0 /* Clear checksum */
|
|
mtctr r7 /* Setup for a loop */
|
|
|
|
GETSYM(r4, start)
|
|
mr r3,r8 /* Get the load addr */
|
|
|
|
cmpw cr0,r4,r3 /* If we need to copy from the end, do so */
|
|
bgt do_relocate_from_end
|
|
|
|
do_relocate_from_start:
|
|
1: lwz r5,0(r3) /* Load and decrement */
|
|
stw r5,0(r4) /* Store and decrement */
|
|
addi r3,r3,4
|
|
addi r4,r4,4
|
|
xor r6,r6,r5 /* Update checksum */
|
|
bdnz 1b /* Are we done? */
|
|
b do_relocate_out /* Finished */
|
|
|
|
do_relocate_from_end:
|
|
GETSYM(r3, end)
|
|
slwi r4,r7,2
|
|
add r4,r8,r4 /* Get the physical end */
|
|
1: lwzu r5,-4(r4)
|
|
stwu r5, -4(r3)
|
|
xor r6,r6,r5
|
|
bdnz 1b
|
|
|
|
do_relocate_out:
|
|
GETSYM(r3,start_ldr)
|
|
mtlr r3 /* Easiest way to do an absolute jump */
|
|
/* Some boards don't boot up with the I-cache enabled. Do that
|
|
* now because the decompress runs much faster that way.
|
|
* As a side effect, we have to ensure the data cache is not enabled
|
|
* so we can access the serial I/O without trouble.
|
|
*/
|
|
b flush_instruction_cache
|
|
|
|
.previous
|
|
|
|
start_ldr:
|
|
/* Clear all of BSS and set up stack for C calls */
|
|
lis r3,__bss_start@h
|
|
ori r3,r3,__bss_start@l
|
|
lis r4,end@h
|
|
ori r4,r4,end@l
|
|
subi r3,r3,4
|
|
subi r4,r4,4
|
|
li r0,0
|
|
50: stwu r0,4(r3)
|
|
cmpw cr0,r3,r4
|
|
blt 50b
|
|
90: mr r9,r1 /* Save old stack pointer (in case it matters) */
|
|
lis r1,.stack@h
|
|
ori r1,r1,.stack@l
|
|
addi r1,r1,4096*2
|
|
subi r1,r1,256
|
|
li r2,0x000F /* Mask pointer to 16-byte boundary */
|
|
andc r1,r1,r2
|
|
|
|
/*
|
|
* Exec kernel loader
|
|
*/
|
|
mr r3,r8 /* Load point */
|
|
mr r4,r7 /* Program length */
|
|
mr r5,r6 /* Checksum */
|
|
mr r6,r11 /* Residual data */
|
|
mr r7,r25 /* Validated OFW interface */
|
|
bl load_kernel
|
|
|
|
/*
|
|
* Make sure the kernel knows we don't have things set in
|
|
* registers. -- Tom
|
|
*/
|
|
li r4,0
|
|
li r5,0
|
|
li r6,0
|
|
|
|
/*
|
|
* Start at the begining.
|
|
*/
|
|
#ifdef CONFIG_PPC_PREP
|
|
li r9,0xc
|
|
mtlr r9
|
|
/* tell kernel we're prep, by putting 0xdeadc0de at KERNELLOAD,
|
|
* and tell the kernel to start on the 4th instruction since we
|
|
* overwrite the first 3 sometimes (which are 'nop').
|
|
*/
|
|
lis r10,0xdeadc0de@h
|
|
ori r10,r10,0xdeadc0de@l
|
|
li r9,0
|
|
stw r10,0(r9)
|
|
#else
|
|
li r9,0
|
|
mtlr r9
|
|
#endif
|
|
blr
|
|
|
|
.comm .stack,4096*2,4
|