1、Linux内核构成 (国嵌) Linux/arch/arm/boot/compressed/head.s 1.解压缩 2.初始化 3.启动应用程序 1 arch/arm/boot/compressed/Makefile arch/arm/boot/compressed/vmlinux.lds 2. arch/arm/kernel/vmlinux.lds Linux内核启动流程 (国嵌) arch/arm/boot/compressed/start.S(head.s—负责解压缩) Start: .type sta
2、rt,#function .rept 8 mov r0,r0 .endr b 1f .word 0x016f2818 @ Magic numbers to help the loader .word start @ absolute load/run zImage address
3、 .word _edata @ zImage end address 1: mov r7,r1 @ save architecture ID mov r8,r2 @ save atags pointer 这也标志着u-boot将系统完全交给了OS,bootloader生命终结。之后裔码在133行会读取cpsr并判断与否解决器处在supervisor模式——从u-boot进入kernel,系统已经处
4、在SVC32模式;而运用angel进入则处在user模式,还需要额外两条指令。之后是再次确认中断关闭,并完毕cpsr写入 mrs r2,cpsr @ get current mode tst r2,#3 @ not user? bne not_angel mov r0,#0x17 @ angel_SWIreason_EnterSVC
5、 swi 0x123456 @ angel_SWI_ARM not_angel: mrs r2,cpsr @ turn off interrupts to orr r2,r2,#0xc0 @ prevent angel from running msr cpsr_c,r2 然后在LC0地址处将分段信息导入r0-r6、ip、sp等寄存器,并检查代码
6、与否运营在与链接时相似目的地址,以决定与否进行解决。由于当前很少有人不使用loader和tags,将zImage烧写到rom直接从0x0位置执行,因此这个解决是必要(但是zImage头当前也保存了不用loader也可启动能力)。arm架构下自解压头普通是链接在0x0地址而被加载到0x30008000运营,因此要修正这个变化。涉及到 r5寄存器存储zImage基地址 r6和r12(即ip寄存器)存储got(global offset table) r2和r3存储bss段起止地址 sp栈指针地址 很简朴,这些寄存器统统被加上一种你也能猜到偏移地址 0x30008000。该地址
7、是s3c2410有关,其她ARM解决器可以参照下表 PXA2xx是0xa0008000 IXP2x00和IXP4xx是0x00008000 Freescale i.MX31/37是0x80008000 TI davinci DM64xx是0x80008000 TI omap系列是0x80008000 AT91RM/SAM92xx系列是0x8000 Cirrus EP93xx是0x00008000 这些操作发生在代码172行开始地方,下面只粘贴一某些 add r5,r5,r0 add
8、 r6,r6,r0 add ip,ip,r0 背面在211行进行bss段清零工作 not_relocated: mov r0,#0 1: str r0,[r2],#4 @ clear bss str r0,[r2],#4 str r0,[r2],#4 str r0,[r2],#4 cmp r2,r3
9、 blo 1b 然后224行,打开cache,并为背面解压缩设立64KB暂时malloc空间 bl cache_on mov r1,sp @ malloc space above stack add r2,sp,#0x10000 @ 64k max 接下来238行进行检查,拟定内核解压缩后Image目的地址与否会覆盖到zImage头,如果是则准备将zImage头转移到解压
10、出来内核背面 cmp r4,r2 bhs wont_overwrite sub r3,sp,r5 @ > compressed kernel size add r0,r4,r3,lsl #2 @ allow for 4x expansion cmp r0,r5 bls wont_overwrite
11、 mov r5,r2 @ decompress after malloc space mov r0,r5 mov r3,r7 bl decompress_kernel 真实状况——在大多数应用中,内核编译都会把压缩zImage和非压缩Image链接到同样地址,s3c2410平台下即是0x30008000。这样做好处是,人们不用关怀内核是Image还是zImage,放到这个位置执行就OK,因此在解压
12、缩后zImage头必要为真正内核让路。 在250行解压完毕,内核长度返回值存储在r0寄存器里。在内核末尾空出128字节栈空间用,并且使其长度128字节对齐。 add r0,r0,#127 + 128 @ alignment + stack bic r0,r0,#127 @ align the kernel length 算出搬移代码参数:计算内核末尾地址并存储于r1寄存器,需要搬移代码本来地址放在r2,需要搬移长度放在r3。然后执行搬移,并设立好sp指针指向新栈(本来
13、栈也会被内核覆盖掉) add r1,r5,r0 @ end of decompressed kernel adr r2,reloc_start ldr r3,LC1 add r3,r2,r3 1: ldmia r2!,{r9 - r14} @ copy relocation code stmia r1!,{r9 -
14、r14} ldmia r2!,{r9 - r14} stmia r1!,{r9 - r14} cmp r2,r3 blo 1b add sp,r1,#128 @ relocate the stack 搬移完毕后刷新cache,由于代码地址变化了不能让cache再命中被内核覆盖老地址。然后跳转到新地址继续执行 bl cache
15、clean_flush add pc,r5,r0 @ call relocation code 注意——zImage在解压后搬移和跳转会给gdb调试内核带来麻烦。由于用来调试符号表是在编译是生成,并不懂得后来会被搬移到何处去,只有在内核解压缩完毕之后,依照计算出来参数“告诉”调试器这个变化。以撰写本文时使用zImage为例,内核自解压头重定向后,reloc_start地址由0x30008360变为0x30533e60。故咱们要把vmlinux符号表也相应从0x30008000后移到0x30533b00开始,这样gdb就可以
16、对的相应源代码和机器指令。 随着头部代码移动到新位置,不会再和内核目的地址冲突,可以开始内核自身搬移了。此时r0寄存器存储是内核长度(严格说是长度外加128Byte栈),r4存储是内核目地址0x30008000,r5是当前内核存储地址,r6是CPU ID,r7是machine ID,r8是atags地址。代码从501行开始 reloc_start: add r9,r5,r0 sub r9,r9,#128 @ do not copy the stack debug_reloc
17、start mov r1,r4 1: .rept 4 ldmia r5!,{r0,r2,r3,r10 - r14} @ relocate kernel stmia r1!,{r0,r2,r3,r10 - r14} .endr cmp r5,r9 blo 1b add s
18、p,r1,#128 @ relocate the stack 接下来在516行清除并关闭cache,清零r0,将machine ID存入r1,atags指针存入r2,再跳入0x30008000执行真正内核Image call_kernel: bl cache_clean_flush bl cache_off mov r0,#0 @ must be zero mov r1,r7
19、 @ restore architecture number mov r2,r8 @ restore atags pointer mov pc,r4 @ call kernel 内核代码入口在arch/arm/kernel/head.S文献83行。一方面进入SVC32模式,并查询CPU ID,检查合法性 msr cpsr_c,#PSR_F_BIT | PSR_I_BIT |
20、SVC_MODE @ ensure svc mode @ and irqs disabled mrc p15,0,r9,c0,c0 @ get processor id bl __lookup_processor_type @ r5=procinfo r9=cpuid movs r10,r5 @ invalid processor
21、r5=0)? beq __error_p @ yes,error 'p' 接着在87行进一步查询machine ID并检查合法性 bl __lookup_machine_type @ r5=machinfo movs r8,r5 @ invalid machine (r5=0)? beq __error_a @ yes,erro
22、r 'a' 其中__lookup_processor_type在linux-2.6.24-moko-linuxbj/arch/arm/kernel/head-common.S文献149行,该函数首将标号3实际地址加载到r3,然后将编译时生成__proc_info_begin虚拟地址载入到r5,__proc_info_end虚拟地址载入到r6,标号3虚拟地址载入到r7。由于adr伪指令和标号3使用,以及__proc_info_begin等符号在linux-2.6.24-moko-linuxbj/arch/arm/kernel/vmlinux.lds而不是代码中被定义,此处代码不是非常直观,想弄
23、清晰代码缘由读者请耐心阅读这两个文献和adr伪指令阐明。 r3和r7分别存储是同一位置标号3物理地址(由于没有启用mmu,因此当前必定是物理地址)和虚拟地址,因此儿者相减即得到虚拟地址和物理地址之间offset。运用此offset,将r5和r6中保存虚拟地址转变为物理地址 __lookup_processor_type: adr r3,3f ldmda r3,{r5 - r7} sub r3,r3,r7 @ get offset between virt&phys add r5,r5,r3
24、 @ convert virt addresses to add r6,r6,r3 @ physical address space 然后从proc_info中读出内核编译时写入processor ID和之前从cpsr中读到processor ID对比,查看代码和CPU硬件与否匹配(想在arm920t上运营为cortex-a8编译内核?不让!)。如果编译了各种解决器支持,如versatile板,则会循环每种type依次检查,如果硬件读出ID在内核中找不到匹配,则r5置0返回 1: ldmia r5,{r3,r4} @ value,
25、mask and r4,r4,r9 @ mask wanted bits teq r3,r4 beq 2f add r5,r5,#PROC_INFO_SZ @ sizeof(proc_info_list) cmp r5,r6 blo 1b mov r5,#0 @ unknown processor 2: mov pc,lr __lookup_machine_type在linux-2.6.24-moko-linuxbj/arch/arm/kernel/head-common.S文献197行,编码办法与检查processor ID完全同样,请参照前段
26、 __lookup_machine_type: adr r3,3b ldmia r3,{r4,r5,r6} sub r3,r3,r4 @ get offset between virt&phys add r5,r5,r3 @ convert virt addresses to add r6,r6,r3 @ physical address space 1: ldr r3,[r5,#MACHINFO_TYPE] @ get machine type teq r3,r1 @ matches loader number? beq 2f @ f
27、ound add r5,r5,#SIZEOF_MACHINE_DESC @ next machine_desc cmp r5,r6 blo 1b mov r5,#0 @ unknown machine 2: mov pc,lr 代码回到head.S第92行,检查atags合法性,然后创立初始页表 bl __vet_atags bl __create_page_tables 创立页表代码在218行,一方面将内核起始地址-0x4000到内核起始地址之间16K存储器清0 __create_page_tables: pgtbl r4 @ pag
28、e table address /* * Clear the 16K level 1 swapper page table */ mov r0,r4 mov r3,#0 add r6,r0,#0x4000 1: str r3,[r0],#4 str r3,[r0],#4 str r3,[r0],#4 str r3,[r0],#4 teq r0,r6 bne 1b 然后在234行将proc_info中mmu_flags加载到r7 ldr r7,[r10,#PROCINFO_MM_MMUFLAGS] @ mm_mmuflags在24
29、2行将PC指针右移20位,得到内核第一种1MB空间段地址存入r6,在s3c2410平台该值是0x300。接着依照此值存入映射标记 mov r6,pc,lsr #20 @ start of kernel section orr r3,r7,r6,lsl #20 @ flags + kernel base str r3,[r4,r6,lsl #2] @ identity mapping 完毕页表设立后回到102行,为打开虚拟地址映射作准备。设立sp指针,函数返回地址lr指向__enable_mmu,并跳转到linux-2.6.24-moko-linuxbj/arch/ar
30、m/mm/proc-arm920.S386行,清除I-cache、D-cache、write buffer和TLB __arm920_setup: mov r0,#0 mcr p15,0,r0,c7,c7 @ invalidate I,D caches on v4 mcr p15,0,r0,c7,c10,4 @ drain write buffer on v4 #ifdef CONFIG_MMU mcr p15,0,r0,c8,c7 @ invalidate I,D TLBs on v4 #endif然后返回head.S158行,加载domain和页表,跳转到_
31、turn_mmu_on __enable_mmu: #ifdef CONFIG_ALIGNMENT_TRAP orr r0,r0,#CR_A #else bic r0,r0,#CR_A #endif #ifdef CONFIG_CPU_DCACHE_DISABLE bic r0,r0,#CR_C #endif #ifdef CONFIG_CPU_BPREDICT_DISABLE bic r0,r0,#CR_Z #endif #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0,r0,#CR_I #endif mov r
32、5,#(domain_val(DOMAIN_USER,DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL,DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE,DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO,DOMAIN_CLIENT)) mcr p15,0,r5,c3,c0,0 @ load domain access register mcr p15,0,r4,c2,c0,0 @ load page table poin
33、ter b __turn_mmu_on在194行把mmu使能位写入mmu,激活虚拟地址。然后将本来保存在sp中地址载入pc,跳转到head-common.S__mmap_switched,至此代码进入虚拟地址世界 mov r0,r0 mcr p15,0,r0,c1,c0,0 @ write control reg mrc p15,0,r3,c0,c0,0 @ read id reg mov r3,r3 mov r3,r3 mov pc,r13 在head-common.S37行开始清除内核bss段,processor ID保存在r9,machine ID报
34、存在r1,atags地址保存在r2,并将控制寄存器保存到r7定义内存地址。接下来跳入linux-2.6.24-moko-linuxbj/init/main.c507行,start_kernel函数。这里只粘贴某些代码(第一种C语言函数,作一系列初始化) __mmap_switched: adr r3,__switch_data + 4 ldmia r3!,{r4,r5,r6,r7} cmp r4,r5 @ Copy data segment if needed 1: cmpne r5,r6 ldrne fp,[r4],#4 strne fp,[r5],#4
35、 bne 1b asmlinkage void __init start_kernel(void) { char * command_line; extern struct kernel_param __start___param[],__stop___param[]; smp_setup_processor_id(); /* * Need to run as early as possible,to initialize the * lockdep hash: */ lockdep_init(); debug_objects_ea
36、rly_init(); cgroup_init_early(); local_irq_disable(); early_boot_irqs_off(); early_init_irq_lock_class(); /* * Interrupts are still disabled. Do necessary setups,then * enable them */ lock_kernel(); tick_init(); boot_cpu_init(); page_address_init(); printk(KERN_NOTICE);
37、 printk(linux_banner); setup_arch(&command_line); mm_init_owner(&init_mm,&init_task); setup_command_line(command_line); setup_per_cpu_areas(); setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ /* * Set up the scheduler prior starting any interrupts (s
38、uch as the * timer interrupt). Full topology setup happens at smp_init() * time - but meanwhile we still have a functioning scheduler. */ sched_init(); /* * Disable preemption - early bootup scheduling is extremely * fragile until we cpu_idle() for the first time. */ preempt_
39、disable(); build_all_zonelists(); page_alloc_init(); printk(KERN_NOTICE "Kernel command line:%s\n",boot_command_line); parse_early_param(); parse_args("Booting kernel",static_command_line,__start___param, __stop___param - __start___param, &unknown_bootoption); if (!irqs_disab
40、led()) { printk(KERN_WARNING "start_kernel():bug:interrupts were " "enabled *very* early,fixing it\n"); local_irq_disable(); } sort_main_extable(); trap_init(); rcu_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); pidhash_init(); init_ti
41、mers(); hrtimers_init(); softirq_init(); timekeeping_init(); time_init(); sched_clock_init(); profile_init(); if (!irqs_disabled()) printk(KERN_CRIT "start_kernel():bug:interrupts were " "enabled early\n"); early_boot_irqs_on(); local_irq_enable(); /* * HACK ALERT!T
42、his is early. We're enabling the console before * we've done PCI setups etc,and console_init() must be aware of * this. But we do want output early,in case something goes wrong. */ console_init(); if (panic_later) panic(panic_later,panic_param); lockdep_info(); /* * Need t
43、o run this when irqs are enabled,because it wants * to self-test [hard/soft]-irqs on/off lock inversion bugs * too: */ locking_selftest(); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
44、 printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " "disabling it.\n", page_to_pfn(virt_to_page((void *)initrd_start)), min_low_pfn); initrd_start = 0; } #endif vmalloc_init(); vfs_caches_init_early(); cpuset_init_early(); page_cgroup_init(); mem_ini
45、t(); enable_debug_pagealloc(); cpu_hotplug_init(); kmem_cache_init(); debug_objects_mem_init(); idr_init_cache(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) late_time_init(); calibrate_delay(); pidmap_init(); pgtable_cache_init(); prio_tree_init();
46、anon_vma_init(); #ifdef CONFIG_X86 if (efi_enabled) efi_enter_virtual_mode(); #endif thread_info_cache_init(); cred_init(); fork_init(num_physpages); proc_caches_init(); buffer_init(); key_init(); security_init(); vfs_caches_init(num_physpages); radix_tree_init(); signals_
47、init(); /* rootfs populating might need page-writeback */ page_writeback_init(); #ifdef CONFIG_PROC_FS proc_root_init(); #endif cgroup_init(); cpuset_init(); taskstats_init_early(); delayacct_init(); check_bugs(); acpi_early_init();/* before LAPIC and SMP init */ ftrace_i
48、nit(); /* Do the rest non-__init'ed,we're now alive */ rest_init(); } tatic noinline void __init_refok rest_init(void) __releases(kernel_lock) { int pid; kernel_thread(kernel_init,NULL,CLONE_FS | CLONE_SIGHAND); numa_default_policy(); pid = kernel_thread(kthreadd,NULL,CLONE_FS
49、 | CLONE_FILES); kthreadd_task = find_task_by_pid_ns(pid,&init_pid_ns); unlock_kernel(); /* * The boot idle thread must execute schedule() * at least once to get things moving: */ init_idle_bootup_task(current); rcu_scheduler_starting(); preempt_enable_no_resched(); schedule
50、); preempt_disable(); /* Call into cpu_idle with preempt disabled */ cpu_idle(); } static noinline int init_post(void) { /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); unlock_kernel(); mark_rodata_ro(); system_






