2005-04-17 00:20:36 +02:00
|
|
|
/*
|
|
|
|
* linux/arch/i386/kernel/head.S -- the 32-bit startup code.
|
|
|
|
*
|
|
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
|
|
*
|
|
|
|
* Enhanced CPU detection and feature setting code by Mike Jagdis
|
|
|
|
* and Martin Mares, November 1997.
|
|
|
|
*/
|
|
|
|
|
|
|
|
.text
|
|
|
|
#include <linux/threads.h>
|
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <asm/segment.h>
|
|
|
|
#include <asm/page.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
#include <asm/desc.h>
|
|
|
|
#include <asm/cache.h>
|
|
|
|
#include <asm/thread_info.h>
|
2005-09-09 19:28:28 +02:00
|
|
|
#include <asm/asm-offsets.h>
|
2005-04-17 00:20:36 +02:00
|
|
|
#include <asm/setup.h>
|
|
|
|
|
|
|
|
/*
|
|
|
|
* References to members of the new_cpu_data structure.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define X86 new_cpu_data+CPUINFO_x86
|
|
|
|
#define X86_VENDOR new_cpu_data+CPUINFO_x86_vendor
|
|
|
|
#define X86_MODEL new_cpu_data+CPUINFO_x86_model
|
|
|
|
#define X86_MASK new_cpu_data+CPUINFO_x86_mask
|
|
|
|
#define X86_HARD_MATH new_cpu_data+CPUINFO_hard_math
|
|
|
|
#define X86_CPUID new_cpu_data+CPUINFO_cpuid_level
|
|
|
|
#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
|
|
|
|
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is how much memory *in addition to the memory covered up to
|
|
|
|
* and including _end* we need mapped initially. We need one bit for
|
|
|
|
* each possible page, but only in low memory, which means
|
|
|
|
* 2^32/4096/8 = 128K worst case (4G/4G split.)
|
|
|
|
*
|
|
|
|
* Modulo rounding, each megabyte assigned here requires a kilobyte of
|
|
|
|
* memory, which is currently unreclaimed.
|
|
|
|
*
|
|
|
|
* This should be a multiple of a page.
|
|
|
|
*/
|
|
|
|
#define INIT_MAP_BEYOND_END (128*1024)
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
|
|
|
|
* %esi points to the real-mode code as a 32-bit pointer.
|
|
|
|
* CS and DS must be 4 GB flat segments, but we don't depend on
|
|
|
|
* any particular GDT layout, because we load our own as soon as we
|
|
|
|
* can.
|
|
|
|
*/
|
2007-02-13 13:26:22 +01:00
|
|
|
.section .text.head,"ax",@progbits
|
2005-04-17 00:20:36 +02:00
|
|
|
ENTRY(startup_32)
|
|
|
|
|
2006-12-07 02:14:08 +01:00
|
|
|
#ifdef CONFIG_PARAVIRT
|
|
|
|
movl %cs, %eax
|
|
|
|
testl $0x3, %eax
|
|
|
|
jnz startup_paravirt
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
/*
|
|
|
|
* Set segments to known values.
|
|
|
|
*/
|
|
|
|
cld
|
|
|
|
lgdt boot_gdt_descr - __PAGE_OFFSET
|
|
|
|
movl $(__BOOT_DS),%eax
|
|
|
|
movl %eax,%ds
|
|
|
|
movl %eax,%es
|
|
|
|
movl %eax,%fs
|
|
|
|
movl %eax,%gs
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clear BSS first so that there are no surprises...
|
|
|
|
* No need to cld as DF is already clear from cld above...
|
|
|
|
*/
|
|
|
|
xorl %eax,%eax
|
|
|
|
movl $__bss_start - __PAGE_OFFSET,%edi
|
|
|
|
movl $__bss_stop - __PAGE_OFFSET,%ecx
|
|
|
|
subl %edi,%ecx
|
|
|
|
shrl $2,%ecx
|
|
|
|
rep ; stosl
|
2005-09-04 00:56:31 +02:00
|
|
|
/*
|
|
|
|
* Copy bootup parameters out of the way.
|
|
|
|
* Note: %esi still has the pointer to the real-mode data.
|
|
|
|
* With the kexec as boot loader, parameter segment might be loaded beyond
|
|
|
|
* kernel image and might not even be addressable by early boot page tables.
|
|
|
|
* (kexec on panic case). Hence copy out the parameters before initializing
|
|
|
|
* page tables.
|
|
|
|
*/
|
|
|
|
movl $(boot_params - __PAGE_OFFSET),%edi
|
|
|
|
movl $(PARAM_SIZE/4),%ecx
|
|
|
|
cld
|
|
|
|
rep
|
|
|
|
movsl
|
|
|
|
movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
|
|
|
|
andl %esi,%esi
|
|
|
|
jnz 2f # New command line protocol
|
|
|
|
cmpw $(OLD_CL_MAGIC),OLD_CL_MAGIC_ADDR
|
|
|
|
jne 1f
|
|
|
|
movzwl OLD_CL_OFFSET,%esi
|
|
|
|
addl $(OLD_CL_BASE_ADDR),%esi
|
|
|
|
2:
|
2007-02-12 09:54:11 +01:00
|
|
|
movl $(boot_command_line - __PAGE_OFFSET),%edi
|
2005-09-04 00:56:31 +02:00
|
|
|
movl $(COMMAND_LINE_SIZE/4),%ecx
|
|
|
|
rep
|
|
|
|
movsl
|
|
|
|
1:
|
2005-04-17 00:20:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize page tables. This creates a PDE and a set of page
|
|
|
|
* tables, which are located immediately beyond _end. The variable
|
|
|
|
* init_pg_tables_end is set up to point to the first "safe" location.
|
|
|
|
* Mappings are created both at virtual address 0 (identity mapping)
|
|
|
|
* and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
|
|
|
|
*
|
|
|
|
* Warning: don't use %esi or the stack in this code. However, %esp
|
|
|
|
* can be used as a GPR if you really need it...
|
|
|
|
*/
|
|
|
|
page_pde_offset = (__PAGE_OFFSET >> 20);
|
|
|
|
|
|
|
|
movl $(pg0 - __PAGE_OFFSET), %edi
|
|
|
|
movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
|
|
|
|
movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
|
|
|
|
10:
|
|
|
|
leal 0x007(%edi),%ecx /* Create PDE entry */
|
|
|
|
movl %ecx,(%edx) /* Store identity PDE entry */
|
|
|
|
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
|
|
|
|
addl $4,%edx
|
|
|
|
movl $1024, %ecx
|
|
|
|
11:
|
|
|
|
stosl
|
|
|
|
addl $0x1000,%eax
|
|
|
|
loop 11b
|
|
|
|
/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
|
|
|
|
/* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
|
|
|
|
leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
|
|
|
|
cmpl %ebp,%eax
|
|
|
|
jb 10b
|
|
|
|
movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
|
|
|
|
|
|
|
|
xorl %ebx,%ebx /* This is the boot CPU (BSP) */
|
|
|
|
jmp 3f
|
|
|
|
/*
|
|
|
|
* Non-boot CPU entry point; entered from trampoline.S
|
|
|
|
* We can't lgdt here, because lgdt itself uses a data segment, but
|
|
|
|
* we know the trampoline has already loaded the boot_gdt_table GDT
|
|
|
|
* for us.
|
2007-02-13 13:26:22 +01:00
|
|
|
*
|
|
|
|
* If cpu hotplug is not supported then this code can go in init section
|
|
|
|
* which will be freed later
|
2005-04-17 00:20:36 +02:00
|
|
|
*/
|
2007-02-13 13:26:22 +01:00
|
|
|
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
|
|
.section .text,"ax",@progbits
|
|
|
|
#else
|
|
|
|
.section .init.text,"ax",@progbits
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
2005-04-17 00:20:36 +02:00
|
|
|
ENTRY(startup_32_smp)
|
|
|
|
cld
|
|
|
|
movl $(__BOOT_DS),%eax
|
|
|
|
movl %eax,%ds
|
|
|
|
movl %eax,%es
|
|
|
|
movl %eax,%fs
|
|
|
|
movl %eax,%gs
|
|
|
|
|
|
|
|
/*
|
|
|
|
* New page tables may be in 4Mbyte page mode and may
|
|
|
|
* be using the global pages.
|
|
|
|
*
|
|
|
|
* NOTE! If we are on a 486 we may have no cr4 at all!
|
|
|
|
* So we do not try to touch it unless we really have
|
|
|
|
* some bits in it to set. This won't work if the BSP
|
|
|
|
* implements cr4 but this AP does not -- very unlikely
|
|
|
|
* but be warned! The same applies to the pse feature
|
|
|
|
* if not equally supported. --macro
|
|
|
|
*
|
|
|
|
* NOTE! We have to correct for the fact that we're
|
|
|
|
* not yet offset PAGE_OFFSET..
|
|
|
|
*/
|
|
|
|
#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
|
|
|
|
movl cr4_bits,%edx
|
|
|
|
andl %edx,%edx
|
|
|
|
jz 6f
|
|
|
|
movl %cr4,%eax # Turn on paging options (PSE,PAE,..)
|
|
|
|
orl %edx,%eax
|
|
|
|
movl %eax,%cr4
|
|
|
|
|
|
|
|
btl $5, %eax # check if PAE is enabled
|
|
|
|
jnc 6f
|
|
|
|
|
|
|
|
/* Check if extended functions are implemented */
|
|
|
|
movl $0x80000000, %eax
|
|
|
|
cpuid
|
|
|
|
cmpl $0x80000000, %eax
|
|
|
|
jbe 6f
|
|
|
|
mov $0x80000001, %eax
|
|
|
|
cpuid
|
|
|
|
/* Execute Disable bit supported? */
|
|
|
|
btl $20, %edx
|
|
|
|
jnc 6f
|
|
|
|
|
|
|
|
/* Setup EFER (Extended Feature Enable Register) */
|
|
|
|
movl $0xc0000080, %ecx
|
|
|
|
rdmsr
|
|
|
|
|
|
|
|
btsl $11, %eax
|
|
|
|
/* Make changes effective */
|
|
|
|
wrmsr
|
|
|
|
|
|
|
|
6:
|
|
|
|
/* This is a secondary processor (AP) */
|
|
|
|
xorl %ebx,%ebx
|
|
|
|
incl %ebx
|
|
|
|
|
|
|
|
#endif /* CONFIG_SMP */
|
2007-02-13 13:26:22 +01:00
|
|
|
3:
|
2005-04-17 00:20:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Enable paging
|
|
|
|
*/
|
|
|
|
movl $swapper_pg_dir-__PAGE_OFFSET,%eax
|
|
|
|
movl %eax,%cr3 /* set the page table pointer.. */
|
|
|
|
movl %cr0,%eax
|
|
|
|
orl $0x80000000,%eax
|
|
|
|
movl %eax,%cr0 /* ..and set paging (PG) bit */
|
|
|
|
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
|
|
|
|
1:
|
|
|
|
/* Set up the stack pointer */
|
|
|
|
lss stack_start,%esp
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize eflags. Some BIOS's leave bits like NT set. This would
|
|
|
|
* confuse the debugger if this code is traced.
|
|
|
|
* XXX - best to initialize before switching to protected mode.
|
|
|
|
*/
|
|
|
|
pushl $0
|
|
|
|
popfl
|
|
|
|
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
andl %ebx,%ebx
|
|
|
|
jz 1f /* Initial CPU cleans BSS */
|
|
|
|
jmp checkCPUtype
|
|
|
|
1:
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* start system 32-bit setup. We need to re-do some of the things done
|
|
|
|
* in 16-bit mode for the "real" operations.
|
|
|
|
*/
|
|
|
|
call setup_idt
|
|
|
|
|
|
|
|
checkCPUtype:
|
|
|
|
|
|
|
|
movl $-1,X86_CPUID # -1 for no CPUID initially
|
|
|
|
|
|
|
|
/* check if it is 486 or 386. */
|
|
|
|
/*
|
|
|
|
* XXX - this does a lot of unnecessary setup. Alignment checks don't
|
|
|
|
* apply at our cpl of 0 and the stack ought to be aligned already, and
|
|
|
|
* we don't need to preserve eflags.
|
|
|
|
*/
|
|
|
|
|
|
|
|
movb $3,X86 # at least 386
|
|
|
|
pushfl # push EFLAGS
|
|
|
|
popl %eax # get EFLAGS
|
|
|
|
movl %eax,%ecx # save original EFLAGS
|
|
|
|
xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
|
|
|
|
pushl %eax # copy to EFLAGS
|
|
|
|
popfl # set EFLAGS
|
|
|
|
pushfl # get new EFLAGS
|
|
|
|
popl %eax # put it in eax
|
|
|
|
xorl %ecx,%eax # change in flags
|
|
|
|
pushl %ecx # restore original EFLAGS
|
|
|
|
popfl
|
|
|
|
testl $0x40000,%eax # check if AC bit changed
|
|
|
|
je is386
|
|
|
|
|
|
|
|
movb $4,X86 # at least 486
|
|
|
|
testl $0x200000,%eax # check if ID bit changed
|
|
|
|
je is486
|
|
|
|
|
|
|
|
/* get vendor info */
|
|
|
|
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
|
|
|
|
cpuid
|
|
|
|
movl %eax,X86_CPUID # save CPUID level
|
|
|
|
movl %ebx,X86_VENDOR_ID # lo 4 chars
|
|
|
|
movl %edx,X86_VENDOR_ID+4 # next 4 chars
|
|
|
|
movl %ecx,X86_VENDOR_ID+8 # last 4 chars
|
|
|
|
|
|
|
|
orl %eax,%eax # do we have processor info as well?
|
|
|
|
je is486
|
|
|
|
|
|
|
|
movl $1,%eax # Use the CPUID instruction to get CPU type
|
|
|
|
cpuid
|
|
|
|
movb %al,%cl # save reg for future use
|
|
|
|
andb $0x0f,%ah # mask processor family
|
|
|
|
movb %ah,X86
|
|
|
|
andb $0xf0,%al # mask model
|
|
|
|
shrb $4,%al
|
|
|
|
movb %al,X86_MODEL
|
|
|
|
andb $0x0f,%cl # mask mask revision
|
|
|
|
movb %cl,X86_MASK
|
|
|
|
movl %edx,X86_CAPABILITY
|
|
|
|
|
|
|
|
is486: movl $0x50022,%ecx # set AM, WP, NE and MP
|
|
|
|
jmp 2f
|
|
|
|
|
|
|
|
is386: movl $2,%ecx # set MP
|
|
|
|
2: movl %cr0,%eax
|
|
|
|
andl $0x80000011,%eax # Save PG,PE,ET
|
|
|
|
orl %ecx,%eax
|
|
|
|
movl %eax,%cr0
|
|
|
|
|
|
|
|
call check_x87
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
call setup_pda
|
2005-04-17 00:20:36 +02:00
|
|
|
lgdt cpu_gdt_descr
|
|
|
|
lidt idt_descr
|
|
|
|
ljmp $(__KERNEL_CS),$1f
|
|
|
|
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
|
|
|
|
movl %eax,%ss # after changing gdt.
|
|
|
|
|
|
|
|
movl $(__USER_DS),%eax # DS/ES contains default USER segment
|
|
|
|
movl %eax,%ds
|
|
|
|
movl %eax,%es
|
|
|
|
|
2007-02-13 13:26:20 +01:00
|
|
|
xorl %eax,%eax # Clear GS and LDT
|
|
|
|
movl %eax,%gs
|
2005-04-17 00:20:36 +02:00
|
|
|
lldt %ax
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
|
|
|
|
movl $(__KERNEL_PDA),%eax
|
2007-02-13 13:26:20 +01:00
|
|
|
mov %eax,%fs
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
cld # gcc2 wants the direction flag cleared at all times
|
2006-10-21 18:37:02 +02:00
|
|
|
pushl $0 # fake return address for unwinder
|
2005-04-17 00:20:36 +02:00
|
|
|
#ifdef CONFIG_SMP
|
2005-06-25 23:54:49 +02:00
|
|
|
movb ready, %cl
|
|
|
|
movb $1, ready
|
2006-08-30 19:37:09 +02:00
|
|
|
cmpb $0,%cl # the first CPU calls start_kernel
|
|
|
|
jne initialize_secondary # all other CPUs call initialize_secondary
|
2005-04-17 00:20:36 +02:00
|
|
|
#endif /* CONFIG_SMP */
|
2006-08-30 19:37:09 +02:00
|
|
|
jmp start_kernel
|
2005-04-17 00:20:36 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We depend on ET to be correct. This checks for 287/387.
|
|
|
|
*/
|
|
|
|
check_x87:
|
|
|
|
movb $0,X86_HARD_MATH
|
|
|
|
clts
|
|
|
|
fninit
|
|
|
|
fstsw %ax
|
|
|
|
cmpb $0,%al
|
|
|
|
je 1f
|
|
|
|
movl %cr0,%eax /* no coprocessor: have to set bits */
|
|
|
|
xorl $4,%eax /* set EM */
|
|
|
|
movl %eax,%cr0
|
|
|
|
ret
|
|
|
|
ALIGN
|
|
|
|
1: movb $1,X86_HARD_MATH
|
|
|
|
.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
|
|
|
|
ret
|
|
|
|
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
/*
|
|
|
|
* Point the GDT at this CPU's PDA. On boot this will be
|
|
|
|
* cpu_gdt_table and boot_pda; for secondary CPUs, these will be
|
|
|
|
* that CPU's GDT and PDA.
|
|
|
|
*/
|
2007-02-13 13:26:21 +01:00
|
|
|
ENTRY(setup_pda)
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
/* get the PDA pointer */
|
|
|
|
movl start_pda, %eax
|
|
|
|
|
|
|
|
/* slot the PDA address into the GDT */
|
|
|
|
mov cpu_gdt_descr+2, %ecx
|
|
|
|
mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
|
|
|
|
shr $16, %eax
|
|
|
|
mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
|
|
|
|
mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
|
|
|
|
ret
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
/*
|
|
|
|
* setup_idt
|
|
|
|
*
|
|
|
|
* sets up a idt with 256 entries pointing to
|
|
|
|
* ignore_int, interrupt gates. It doesn't actually load
|
|
|
|
* idt - that can be done only after paging has been enabled
|
|
|
|
* and the kernel moved to PAGE_OFFSET. Interrupts
|
|
|
|
* are enabled elsewhere, when we can be relatively
|
|
|
|
* sure everything is ok.
|
|
|
|
*
|
|
|
|
* Warning: %esi is live across this function.
|
|
|
|
*/
|
|
|
|
setup_idt:
|
|
|
|
lea ignore_int,%edx
|
|
|
|
movl $(__KERNEL_CS << 16),%eax
|
|
|
|
movw %dx,%ax /* selector = 0x0010 = cs */
|
|
|
|
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
|
|
|
|
|
|
|
|
lea idt_table,%edi
|
|
|
|
mov $256,%ecx
|
|
|
|
rp_sidt:
|
|
|
|
movl %eax,(%edi)
|
|
|
|
movl %edx,4(%edi)
|
|
|
|
addl $8,%edi
|
|
|
|
dec %ecx
|
|
|
|
jne rp_sidt
|
2006-09-26 10:52:39 +02:00
|
|
|
|
|
|
|
.macro set_early_handler handler,trapno
|
|
|
|
lea \handler,%edx
|
|
|
|
movl $(__KERNEL_CS << 16),%eax
|
|
|
|
movw %dx,%ax
|
|
|
|
movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
|
|
|
|
lea idt_table,%edi
|
|
|
|
movl %eax,8*\trapno(%edi)
|
|
|
|
movl %edx,8*\trapno+4(%edi)
|
|
|
|
.endm
|
|
|
|
|
|
|
|
set_early_handler handler=early_divide_err,trapno=0
|
|
|
|
set_early_handler handler=early_illegal_opcode,trapno=6
|
|
|
|
set_early_handler handler=early_protection_fault,trapno=13
|
|
|
|
set_early_handler handler=early_page_fault,trapno=14
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
ret
|
|
|
|
|
2006-09-26 10:52:39 +02:00
|
|
|
early_divide_err:
|
|
|
|
xor %edx,%edx
|
|
|
|
pushl $0 /* fake errcode */
|
|
|
|
jmp early_fault
|
|
|
|
|
|
|
|
early_illegal_opcode:
|
|
|
|
movl $6,%edx
|
|
|
|
pushl $0 /* fake errcode */
|
|
|
|
jmp early_fault
|
|
|
|
|
|
|
|
early_protection_fault:
|
|
|
|
movl $13,%edx
|
|
|
|
jmp early_fault
|
|
|
|
|
|
|
|
early_page_fault:
|
|
|
|
movl $14,%edx
|
|
|
|
jmp early_fault
|
|
|
|
|
|
|
|
early_fault:
|
|
|
|
cld
|
|
|
|
#ifdef CONFIG_PRINTK
|
|
|
|
movl $(__KERNEL_DS),%eax
|
|
|
|
movl %eax,%ds
|
|
|
|
movl %eax,%es
|
|
|
|
cmpl $2,early_recursion_flag
|
|
|
|
je hlt_loop
|
|
|
|
incl early_recursion_flag
|
|
|
|
movl %cr2,%eax
|
|
|
|
pushl %eax
|
|
|
|
pushl %edx /* trapno */
|
|
|
|
pushl $fault_msg
|
|
|
|
#ifdef CONFIG_EARLY_PRINTK
|
|
|
|
call early_printk
|
|
|
|
#else
|
|
|
|
call printk
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
hlt_loop:
|
|
|
|
hlt
|
|
|
|
jmp hlt_loop
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
/* This is the default interrupt "handler" :-) */
|
|
|
|
ALIGN
|
|
|
|
ignore_int:
|
|
|
|
cld
|
2005-05-01 17:59:02 +02:00
|
|
|
#ifdef CONFIG_PRINTK
|
2005-04-17 00:20:36 +02:00
|
|
|
pushl %eax
|
|
|
|
pushl %ecx
|
|
|
|
pushl %edx
|
|
|
|
pushl %es
|
|
|
|
pushl %ds
|
|
|
|
movl $(__KERNEL_DS),%eax
|
|
|
|
movl %eax,%ds
|
|
|
|
movl %eax,%es
|
2006-09-26 10:52:39 +02:00
|
|
|
cmpl $2,early_recursion_flag
|
|
|
|
je hlt_loop
|
|
|
|
incl early_recursion_flag
|
2005-04-17 00:20:36 +02:00
|
|
|
pushl 16(%esp)
|
|
|
|
pushl 24(%esp)
|
|
|
|
pushl 32(%esp)
|
|
|
|
pushl 40(%esp)
|
|
|
|
pushl $int_msg
|
2006-02-12 02:55:56 +01:00
|
|
|
#ifdef CONFIG_EARLY_PRINTK
|
|
|
|
call early_printk
|
|
|
|
#else
|
2005-04-17 00:20:36 +02:00
|
|
|
call printk
|
2006-02-12 02:55:56 +01:00
|
|
|
#endif
|
2005-04-17 00:20:36 +02:00
|
|
|
addl $(5*4),%esp
|
|
|
|
popl %ds
|
|
|
|
popl %es
|
|
|
|
popl %edx
|
|
|
|
popl %ecx
|
|
|
|
popl %eax
|
2005-05-01 17:59:02 +02:00
|
|
|
#endif
|
2005-04-17 00:20:36 +02:00
|
|
|
iret
|
|
|
|
|
2007-02-13 13:26:22 +01:00
|
|
|
.section .text
|
2006-12-07 02:14:08 +01:00
|
|
|
#ifdef CONFIG_PARAVIRT
|
|
|
|
startup_paravirt:
|
|
|
|
cld
|
|
|
|
movl $(init_thread_union+THREAD_SIZE),%esp
|
|
|
|
|
|
|
|
/* We take pains to preserve all the regs. */
|
|
|
|
pushl %edx
|
|
|
|
pushl %ecx
|
|
|
|
pushl %eax
|
|
|
|
|
|
|
|
pushl $__start_paravirtprobe
|
|
|
|
1:
|
|
|
|
movl 0(%esp), %eax
|
2007-02-13 13:26:26 +01:00
|
|
|
cmpl $__stop_paravirtprobe, %eax
|
|
|
|
je unhandled_paravirt
|
2006-12-07 02:14:08 +01:00
|
|
|
pushl (%eax)
|
|
|
|
movl 8(%esp), %eax
|
|
|
|
call *(%esp)
|
|
|
|
popl %eax
|
|
|
|
|
|
|
|
movl 4(%esp), %eax
|
|
|
|
movl 8(%esp), %ecx
|
|
|
|
movl 12(%esp), %edx
|
|
|
|
|
|
|
|
addl $4, (%esp)
|
|
|
|
jmp 1b
|
2007-02-13 13:26:26 +01:00
|
|
|
|
|
|
|
unhandled_paravirt:
|
|
|
|
/* Nothing wanted us: we're screwed. */
|
|
|
|
ud2
|
2006-12-07 02:14:08 +01:00
|
|
|
#endif
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
/*
|
|
|
|
* Real beginning of normal "text" segment
|
|
|
|
*/
|
|
|
|
ENTRY(stext)
|
|
|
|
ENTRY(_stext)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BSS section
|
|
|
|
*/
|
|
|
|
.section ".bss.page_aligned","w"
|
|
|
|
ENTRY(swapper_pg_dir)
|
|
|
|
.fill 1024,4,0
|
|
|
|
ENTRY(empty_zero_page)
|
|
|
|
.fill 4096,1,0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This starts the data section.
|
|
|
|
*/
|
|
|
|
.data
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
ENTRY(start_pda)
|
|
|
|
.long boot_pda
|
2005-04-17 00:20:36 +02:00
|
|
|
|
|
|
|
ENTRY(stack_start)
|
|
|
|
.long init_thread_union+THREAD_SIZE
|
|
|
|
.long __BOOT_DS
|
|
|
|
|
|
|
|
ready: .byte 0
|
|
|
|
|
2006-09-26 10:52:39 +02:00
|
|
|
early_recursion_flag:
|
|
|
|
.long 0
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
int_msg:
|
|
|
|
.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
|
|
|
|
|
2006-09-26 10:52:39 +02:00
|
|
|
fault_msg:
|
|
|
|
.ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
|
|
|
|
.asciz "Stack: %p %p %p %p %p %p %p %p\n"
|
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
/*
|
|
|
|
* The IDT and GDT 'descriptors' are a strange 48-bit object
|
|
|
|
* only used by the lidt and lgdt instructions. They are not
|
|
|
|
* like usual segment descriptors - they consist of a 16-bit
|
|
|
|
* segment size, and 32-bit linear address value:
|
|
|
|
*/
|
|
|
|
|
|
|
|
.globl boot_gdt_descr
|
|
|
|
.globl idt_descr
|
|
|
|
|
|
|
|
ALIGN
|
|
|
|
# early boot GDT descriptor (must use 1:1 address mapping)
|
|
|
|
.word 0 # 32 bit align gdt_desc.address
|
|
|
|
boot_gdt_descr:
|
|
|
|
.word __BOOT_DS+7
|
|
|
|
.long boot_gdt_table - __PAGE_OFFSET
|
|
|
|
|
|
|
|
.word 0 # 32-bit align idt_desc.address
|
|
|
|
idt_descr:
|
|
|
|
.word IDT_ENTRIES*8-1 # idt contains 256 entries
|
|
|
|
.long idt_table
|
|
|
|
|
|
|
|
# boot GDT descriptor (later on used by CPU#0):
|
|
|
|
.word 0 # 32 bit align gdt_desc.address
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
ENTRY(cpu_gdt_descr)
|
2005-04-17 00:20:36 +02:00
|
|
|
.word GDT_ENTRIES*8-1
|
|
|
|
.long cpu_gdt_table
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The boot_gdt_table must mirror the equivalent in setup.S and is
|
|
|
|
* used only for booting.
|
|
|
|
*/
|
|
|
|
.align L1_CACHE_BYTES
|
|
|
|
ENTRY(boot_gdt_table)
|
|
|
|
.fill GDT_ENTRY_BOOT_CS,8,0
|
|
|
|
.quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
|
|
|
|
.quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The Global Descriptor Table contains 28 quadwords, per-CPU.
|
|
|
|
*/
|
2006-03-23 11:59:51 +01:00
|
|
|
.align L1_CACHE_BYTES
|
2005-04-17 00:20:36 +02:00
|
|
|
ENTRY(cpu_gdt_table)
|
|
|
|
.quad 0x0000000000000000 /* NULL descriptor */
|
|
|
|
.quad 0x0000000000000000 /* 0x0b reserved */
|
|
|
|
.quad 0x0000000000000000 /* 0x13 reserved */
|
|
|
|
.quad 0x0000000000000000 /* 0x1b reserved */
|
|
|
|
.quad 0x0000000000000000 /* 0x20 unused */
|
|
|
|
.quad 0x0000000000000000 /* 0x28 unused */
|
|
|
|
.quad 0x0000000000000000 /* 0x33 TLS entry 1 */
|
|
|
|
.quad 0x0000000000000000 /* 0x3b TLS entry 2 */
|
|
|
|
.quad 0x0000000000000000 /* 0x43 TLS entry 3 */
|
|
|
|
.quad 0x0000000000000000 /* 0x4b reserved */
|
|
|
|
.quad 0x0000000000000000 /* 0x53 reserved */
|
|
|
|
.quad 0x0000000000000000 /* 0x5b reserved */
|
|
|
|
|
|
|
|
.quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
|
|
|
|
.quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
|
|
|
|
.quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
|
|
|
|
.quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
|
|
|
|
|
|
|
|
.quad 0x0000000000000000 /* 0x80 TSS descriptor */
|
|
|
|
.quad 0x0000000000000000 /* 0x88 LDT descriptor */
|
|
|
|
|
2006-01-06 09:11:56 +01:00
|
|
|
/*
|
|
|
|
* Segments used for calling PnP BIOS have byte granularity.
|
|
|
|
* They code segments and data segments have fixed 64k limits,
|
|
|
|
* the transfer segment sizes are set at run time.
|
|
|
|
*/
|
|
|
|
.quad 0x00409a000000ffff /* 0x90 32-bit code */
|
|
|
|
.quad 0x00009a000000ffff /* 0x98 16-bit code */
|
|
|
|
.quad 0x000092000000ffff /* 0xa0 16-bit data */
|
[PATCH] x86: Pnp byte granularity
The one remaining caller of set_limit, the PnP BIOS code, calls into the PnP
BIOS, passing kernel parameters in and out. These parameteres may be passed
from arbitrary kernel virtual memory, so they deserve strict protection to
stop a bad BIOS from smashing beyond the object size.
Unfortunately, the use of set_limit was badly botching this by setting the
limit in terms of pages, when it really should have byte granularity.
When doing this, I discovered my BIOS had the buggy code during the "get
system device node" call:
mov ax, es:[bx]
Which is harmless, but has a trivial workaround.
Signed-off-by: Zachary Amsden <zach@vmware.com>
Cc: "Seth, Rohit" <rohit.seth@intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-01-06 09:11:55 +01:00
|
|
|
.quad 0x0000920000000000 /* 0xa8 16-bit data */
|
|
|
|
.quad 0x0000920000000000 /* 0xb0 16-bit data */
|
2006-01-06 09:11:53 +01:00
|
|
|
|
2005-04-17 00:20:36 +02:00
|
|
|
/*
|
|
|
|
* The APM segments have byte granularity and their bases
|
2006-01-06 09:11:53 +01:00
|
|
|
* are set at run time. All have 64k limits.
|
2005-04-17 00:20:36 +02:00
|
|
|
*/
|
2006-01-06 09:11:53 +01:00
|
|
|
.quad 0x00409a000000ffff /* 0xb8 APM CS code */
|
|
|
|
.quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */
|
|
|
|
.quad 0x004092000000ffff /* 0xc8 APM DS data */
|
2005-04-17 00:20:36 +02:00
|
|
|
|
2006-12-07 02:14:01 +01:00
|
|
|
.quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */
|
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-12-07 02:14:02 +01:00
|
|
|
.quad 0x00cf92000000ffff /* 0xd8 - PDA */
|
2005-04-17 00:20:36 +02:00
|
|
|
.quad 0x0000000000000000 /* 0xe0 - unused */
|
|
|
|
.quad 0x0000000000000000 /* 0xe8 - unused */
|
|
|
|
.quad 0x0000000000000000 /* 0xf0 - unused */
|
|
|
|
.quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
|
|
|
|
|