Kernel 'head.S'
After being decompressed, the kernel image starts with another ‘startup_32′ function included in $(linux-2.6.15.3_dir/arch/i386/kernel/head.S’. This ‘head.S’ is the second one in linux source package, which is also called ‘kernel head’. And it is exactly what we want to describe in this artical.
The kernel head continues to perform higher initialization operations for the first linux process(process 0). It sets up an execution environment for the kernel main routine just like what the operating system does before an application begins to start. There are two entries for CPUs in this ‘head.S’ and we only talk about the execution routine of the boot CPU.
/*
* ! $(linux2.6.3.15_dir)/arch/i386/kernel/head.S
*/
ENTRY(startup_32)
/*
* ! We still use liner address, since
* ! %ds = %es = %fs = %gs = __BOOT_DS
* ! we use the third segment which base
* ! address starts from 0×00000000
*/
cld
lgdt boot_gdt_descr – __PAGE_OFFSET
movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
/*
* ! Clear the kernel bss
*/
xorl %eax,%eax
movl $__bss_start – __PAGE_OFFSET,%edi
movl $__bss_stop – __PAGE_OFFSET,%ecx
subl %edi,%ecx
shrl $2,%ecx
rep ; stosl
After copying the bootup parameters, it prepares to enable the paging. Before the paging enabled, some data structure should be loaded first following the ‘Intel Manual Vol3′.
/*
* ! Initialize the provisional kernel page tables
* ! which are stored starting from pg0, right after
* ! the end of the kernel’s uninitialized data segments(bss).
* ! and the provisional page global directory is
* ! contained in the swapper_pg_dir variable.
* !
* ! page_pde_offset = 0x0c00
*/
page_pde_offset = (__PAGE_OFFSET >> 20);
/*
* ! this line indicates the table starts from ‘pg0′
*/
movl $(pg0 – __PAGE_OFFSET), %edi
/*
* ! this line told us ‘swapper_pg_dir’ is the
* ! page directory start point
*/
movl $(swapper_pg_dir – __PAGE_OFFSET), %edx
/*
* ! There were 1024 entries in ‘swapper_pg_dir’
* ! since the code below:
* ! ENTRY(swapper_pg_dir)
* ! .fill 1024,4,0
* !
* ! The first mapping:
* ! both entry 0 and entry 0×300 (page_pde_offset/4) –> pg0
* ! that is (0×00000000~0x007fffff) —> pg0
* ! The second mapping:
* ! both entry 1 and entry 0×301 (page_pde_offset/4+1) –> pg1 (the page following pg0)
* ! that is (0xC0000000~0xC07fffff) —> pg1
* !
* ! The objective of this first phase of paging is to
* ! allow these 8 MB of RAM to be easily addressed
* ! both in real mode and protected mode.
*/
movl $0×007, %eax /* 0×007 = PRESENT+RW+USER */
10:
leal 0×007(%edi),%ecx /* Create PDE entry */
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
addl $4,%edx
movl $1024, %ecx
11:
stosl
addl $0×1000,%eax
loop 11b
/* End condition: we must map up to and including INIT_MAP_BEYOND_END */
/* bytes beyond the end of our own page tables; the +0×007 is the attribute bits */
leal (INIT_MAP_BEYOND_END+0×007)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
movl %edi,(init_pg_tables_end – __PAGE_OFFSET)
/*
* ! here just the boot CPU go this way
*/
#ifdef CONFIG_SMP
xorl %ebx,%ebx /* This is the boot CPU (BSP) */
jmp 3f
The kernel page tables have been loaded and we can enable the paging now!
/*
* Enable paging
*/
movl $swapper_pg_dir-__PAGE_OFFSET,%eax
/*
* ! load the table physical address into the %cr3
*/
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $0×80000000,%eax
/*
* ! Enable the paging
*/
movl %eax,%cr0 /* ..and set paging (PG) bit */
/*
* ! A relative jump after the paging enabled
*/
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1:
/* Set up the stack pointer */
lss stack_start,%esp
There is a relative jump instruction – ‘ljmp $(__BOOT_CS), $1f’. Maybe you wonder what the ‘$1f’ means. ’1′ is a local symbol. To define a local symbol, write a label of the form ‘N:’ (where N represents any digit). To refer to the most recent previous definition of that symbol write ‘Nb’, using the same digit as when you defined the label. To refer to the next definition of a local label, write ‘Nf’. The ‘b’ stands for "backwards" and the ‘f’ stands for "forwards".
Now we are in 32-bit protected mode with paging enable. so we still need to re-do something done in 16-bit mode for ‘real-mode’ operations.
/*
* ! Setup the interrupt descriptor table
* ! All the 256 entries are pointing to
* ! the default interrupt "handler" — ‘ignore_int’
*/
call setup_idt
….
….
setup_idt:
lea ignore_int,%edx
movl $(__KERNEL_CS << 16),%eax
movw %dx,%ax /* selector = 0×0010 = cs */
movw $0x8E00,%dx /* interrupt gate – dpl=0, present */
/*
* ! idt_table varible is defined
* ! in $(linux2.6.3.15_dir)/arch/i386/kernel/traps.c
*/
lea idt_table,%edi
mov $256,%ecx
rp_sidt:
movl %eax,(%edi)
movl %edx,4(%edi)
addl $8,%edi
dec %ecx
jne rp_sidt
ret
After checking the type of CPU, the kernel head prepare to call the kernel main function ‘start_kernel’.
/*
* ! use new descriptor table in safe place
* ! then reload segment registers after lgdt
*/
lgdt cpu_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
movl %eax,%ss # after changing gdt.
movl $(__USER_DS),%eax # DS/ES contains default USER segment
movl %eax,%ds
movl %eax,%es
xorl %eax,%eax # Clear FS/GS and LDT
movl %eax,%fs
movl %eax,%gs
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
…
…
/*
* ! The boot CPU will jump to execute
* ! $(linux2.6.3.15_dir)/init/main.c:start_kernel()
* ! And the start_kernel() should never return
*/
call start_kernel
L6:
jmp L6 # main should never return here, but
# just in case, we know what happens.
评论