在之前都是类似的,由 boot cpu 完成引导,对于其他核,可能系统架构中存在电源管理的模块,需要写相应的寄存器来达到唤醒的操作,至于使能之后的默认地址,也一定是架构相关的,比如默认是物理地址的 0x0000000
那么当boot cpu 完成基础的模块初始化之后,就可以尝试唤醒其他核了,这部分暂时我还没细究,但是关键一定是操作寄存器来使能,可能还有寄存器用来填写使能之后的初始指令的地址,因为这都是可以实现的。
为了能让 smp_processor_id 起作用,跳转到 C 代码之前的关键一步就是得设置栈寄存器指向 init_task ,那么其他的核引导的时候,关键的一点就在于让后面引导的 CPU 拿到自己的栈页面,所以 boot cpu 做的事情就是分配一个页面,然后把地址放在固定的地方,然后唤醒一个核,让它能顺利的引导。
其他的核仍要做一些相应的初始化的,因为很多的寄存器都是每个核都有一个备份,比如仍要打开MMU,以及配置中断模式下的栈,初始化自己的时钟(还包括对中断控制器一些配置)等等,这些初始化函数,都是在 boot cpu 做初始化的代码注册的,让后面的核只需要做一些配置,大部分的工作已经完成了,当然了,它还得通知 boot cpu 完成了初始化,它还得继续唤醒下一个核。
Main CPU
汇编过程
下面从 boot cpu 的角度出发,这类的代码非常多的分析,我会尽量简单的描述,这里的关键有几点
判断处理器类型,完成特定的初始化
使能 MMU
解压 .data 的数据
初始化 bss
初始化栈寄存器(指向 init_task)
这几点的关键就不需要赘述了吧,提一提第一点是如何实现的吧
/arch/arm/kernel/head.S
__HEADENTRY(stext)mrc p15,0, r9, c0, c0 @ get processor idbl __lookup_processor_type @ r5=procinfo r9=cpuidmovs r10, r5 @ invalid processor (r5=0)?...@ before turn on MMUadd pc, r10, #PROCINFO_INITFUNC....
struct proc_info_list { ...unsignedlong __cpu_mm_mmu_flags; /* used by head.S */unsignedlong __cpu_io_mmu_flags; /* used by head.S */unsignedlong __cpu_flush; /* used by head.S */ ...};
/* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses * for the __proc_info lists since we aren't running with the MMU on * (and therefore, we are not in the correct address space). We have to * calculate the offset. * * r9 = cpuid * Returns: * r3, r4, r6 corrupted * r5 = proc_info pointer in physical address space * r9 = cpuid (preserved) */ __CPUINIT__lookup_processor_type: adr r3, __lookup_processor_type_data ldmia r3, {r4 - r6} sub r3, r3, r4 @ get offset between virt&phys add r5, r5, r3 @ convert virt addresses to add r6, r6, r3 @ physical address space1: ldmia r5, {r3, r4} @ value, mask and r4, r4, r9 @ mask wanted bits teq r3, r4 beq 2f add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list) cmp r5, r6 blo 1b mov r5, #0 @ unknown processor2: mov pc, lrENDPROC(__lookup_processor_type) /* * Look in <asm/procinfo.h> for information about the __proc_info structure. */ .align 2 .type __lookup_processor_type_data,%object__lookup_processor_type_data: .long . .long __proc_info_begin .long __proc_info_end .size __lookup_processor_type_data, . - __lookup_processor_type_dat
int __cpuinit __cpu_up(unsignedint cpu,struct task_struct *idle){int ret; /* * We need to tell the secondary core where to find * its stack and the page tables. */secondary_data.stack =task_stack_page(idle)+ THREAD_START_SP;secondary_data.pgdir =virt_to_phys(idmap_pgd);secondary_data.swapper_pg_dir =virt_to_phys(swapper_pg_dir);__cpuc_flush_dcache_area(&secondary_data,sizeof(secondary_data));outer_clean_range(__pa(&secondary_data), __pa(&secondary_data +1)); /* * Now bring the CPU into our world. */ ret =boot_secondary(cpu, idle);if (ret ==0) { /* * CPU was successfully started, wait for it * to come online or time out. */wait_for_completion_timeout(&cpu_running, msecs_to_jiffies(1000)); } secondary_data.stack =NULL;secondary_data.pgdir =0;return ret;}
上面赋值的全局变量,正是当其他的核引导的时候会访问的,当然了,我们需要注意一点,Linux 正是用栈来判断 CPU 的 id 的,而这个 id 其实都是 boot CPU 设置的,因为它们的栈都是它分配(也可以说 init_task,这两者其实紧密相连)
int __cpuinit boot_secondary(unsignedint cpu,struct task_struct *idle){if (smp_ops.smp_boot_secondary)returnsmp_ops.smp_boot_secondary(cpu, idle);return-ENOSYS;}
接下来就涉及到了具体的架构了,但最后都会让 cpu 执行到 secondary_startup 这个函数,先不看具体的架构,来关注一下其他核
Secondary CPU
代码有些更改了顺序,更好理解
/* * Initial data for bringing up a secondary CPU. */struct secondary_data {unsignedlong pgdir;unsignedlong swapper_pg_dir;void*stack;};externstruct secondary_data secondary_data;
.type __secondary_data,%object__secondary_data: .long . .long secondary_data .long __secondary_switchedENTRY(secondary_startup) /* * Common entry point for secondary CPUs. * * Ensure that we're in SVC mode, and IRQs are disabled. Lookup * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ safe_svcmode_maskall r9 mrc p15,0, r9, c0, c0 @ get processor id bl __lookup_processor_type movs r10, r5 @ invalid processor? moveq r0, #'p' @ yes, error 'p' beq __error_p /* * Use the page tables supplied from __cpu_up. */ adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after sub lr, r4, r5 @ mmu has been enabled ldr r4, [r7, lr] @ get secondary_data.pgdir add r7, r7, #4 ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir adr lr,BSYM(__enable_mmu) @ return address mov r13, r12 @ __secondary_switched addressARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor @ (return control reg)ENDPROC(secondary_startup)ENTRY(__secondary_switched) ldr sp, [r7, #4] @ get secondary_data.stack mov fp, #0 b secondary_start_kernelENDPROC(__secondary_switched)
所以,启用了 MMU 之后,最后来到了 secondary_start_kernel
/* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */asmlinkage void __cpuinit secondary_start_kernel(void){struct mm_struct *mm =&init_mm;unsignedint cpu; /* * The identity mapping is uncached (strongly ordered), so * switch away from it before attempting any exclusive accesses. */cpu_switch_mm(mm->pgd, mm);local_flush_bp_all();enter_lazy_tlb(mm, current);local_flush_tlb_all(); cpu =smp_processor_id(); ...cpu_init(); /* * Give the platform a chance to do its own initialisation. */if (smp_ops.smp_secondary_init)smp_ops.smp_secondary_init(cpu);notify_cpu_starting(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online * before we continue - which happens after __cpu_up returns. */set_cpu_online(cpu,true);complete(&cpu_running); /* * Setup the percpu timer for this CPU. */percpu_timer_setup();local_irq_enable();local_fiq_enable(); /* * OK, it's off to the idle thread for us */cpu_startup_entry(CPUHP_ONLINE);}
几个比较关键的点
MMU 相关
对于 ARM 还需要初始化其他模式的sp 寄存器
调用 notify_cpu_starting 调用其他模块注册的初始化函数
通知 boot cpu 完成初始化,注意此时 boot cpu 正在等待
进入 idle 循环,idle 框架也是值得探索的,但这不是重点
这些初始化,保证了后面起来的 CPU,也有自己时钟,也能完成进程调度等功能,已经和主 CPU 没有任何区别
这里也有一个有意思的问题,就是为什么刚起来的 CPU,就可以使用 smp_processor_id,正是因为它的栈(或者说 idle task)是由 boot cpu 所指定,CPU id 也正是如此,当然这个 id 是虚拟 id,CPU 也有自己的 id 寄存器,Linux 使用的是前者,因为这屏蔽了架构,不同的架构的 CPU id 可能千奇百怪(ARM 就是 Mpidr 寄存器)
也就是说,引导CPU,默认 id就是 0,它唤醒的CPU,是我自己加+1,然后给你创建了 init 进程,那么 thread_info->cpuit's up to the boot cpu~
/** * __arm_smccc_hvc() - make HVC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required. * * This function is used to make HVC calls following SMC Calling * Convention. The content of the supplied param are copied to registers 0 * to 7 prior to the HVC instruction. The return values are updated with * the content from register 0 to 3 on return from the HVC instruction. An * optional quirk structure provides vendor specific behavior. */asmlinkage void__arm_smccc_hvc(unsignedlong a0,unsignedlong a1,unsignedlong a2,unsignedlong a3,unsignedlong a4,unsignedlong a5,unsignedlong a6,unsignedlong a7,struct arm_smccc_res *res,struct arm_smccc_quirk *quirk);#definearm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__,NULL)
应该可以猜测的到,是汇编实现的了
/arch/arm/kernel/smccc-call.S
/* * Wrap c macros in asm macros to delay expansion until after the * SMCCC asm macro is expanded. */ .macro SMCCC_SMC__SMC(0) .endm .macro SMCCC instrUNWIND( .fnstart) mov r12, sp push {r4-r7}UNWIND( .save {r4-r7}) ldm r12, {r4-r7} @ r0-r7 8 args \instr pop {r4-r7} ldr r12, [sp, #(4*4)] @ now r12 points at &res stm r12, {r0-r3} bx lrUNWIND( .fnend) .endm/* * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, * unsigned long a3, unsigned long a4, unsigned long a5, * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, * struct arm_smccc_quirk *quirk) */ENTRY(__arm_smccc_smc) SMCCC SMCCC_SMCENDPROC(__arm_smccc_smc)
SMC 是一个宏,分别针对 Thumb 和 ARM 下做了区分,这两者的指令集不一样。上面的操作想要理解,需要知道 ARM Calling Convention,我以前写过一篇只是简单的介绍了,但是最后的一张图便可以理解,当 bl 指令调用的时候,sp 指向的是 第五个参数的地址,理解这个,上述的操作就很自然了
虽然还没分析过它的存在,其实跟 x86 的操作系统非常的相似,以前的 ARM 并不会对内存寄存器等资源设定屏蔽层,如今引入了,而实现就是一个 eret 指令,那不是跟 x86 不谋而合吗,这里是它的 git
/* * Top-level Standard Service SMC handler. This handler will in turn dispatch * calls to PSCI SMC handler */staticuintptr_tstd_svc_smc_handler(uint32_t smc_fid,u_register_t x1,u_register_t x2,u_register_t x3,u_register_t x4,void*cookie,void*handle,u_register_t flags){ /* * Dispatch PSCI calls to PSCI SMC handler and return its return * value */if (is_psci_fid(smc_fid)) {uint64_t ret; ret =psci_smc_handler(smc_fid, x1, x2, x3, x4, cookie, handle, flags);#ifENABLE_RUNTIME_INSTRUMENTATIONPMF_CAPTURE_TIMESTAMP(rt_instr_svc, RT_INSTR_EXIT_PSCI, PMF_NO_CACHE_MAINT);#endifSMC_RET1(handle, ret); } ...}
arm-trusted-firmware/lib/psci/psci_main.c
/******************************************************************************* * PSCI top level handler for servicing SMCs. ******************************************************************************/u_register_tpsci_smc_handler(uint32_t smc_fid,u_register_t x1,u_register_t x2,u_register_t x3,u_register_t x4,void*cookie,void*handle,u_register_t flags){u_register_t ret;if (is_caller_secure(flags))return (u_register_t)SMC_UNK; /* Check the fid against the capabilities */if ((psci_caps &define_psci_cap(smc_fid)) ==0U)return (u_register_t)SMC_UNK;if (((smc_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_32) { /* 32-bit PSCI function, clear top parameter bits */uint32_t r1 = (uint32_t)x1;uint32_t r2 = (uint32_t)x2;uint32_t r3 = (uint32_t)x3;switch (smc_fid) {case PSCI_VERSION: ret = (u_register_t)psci_version();break;case PSCI_CPU_OFF: ret = (u_register_t)psci_cpu_off();break;case PSCI_CPU_SUSPEND_AARCH32: ret = (u_register_t)psci_cpu_suspend(r1, r2, r3);break;case PSCI_CPU_ON_AARCH32: ret = (u_register_t)psci_cpu_on(r1, r2, r3);break; ... ... }}
同一个文件中,还可以找到
/******************************************************************************* * PSCI frontend api for servicing SMCs. Described in the PSCI spec. ******************************************************************************/intpsci_cpu_on(u_register_t target_cpu,uintptr_t entrypoint,u_register_t context_id){int rc;entry_point_info_t ep; /* Determine if the cpu exists of not */ rc =psci_validate_mpidr(target_cpu);if (rc != PSCI_E_SUCCESS)return PSCI_E_INVALID_PARAMS; /* Validate the entry point and get the entry_point_info */ rc =psci_validate_entry_point(&ep, entrypoint, context_id);if (rc != PSCI_E_SUCCESS)return rc; /* * To turn this cpu on, specify which power * levels need to be turned on */returnpsci_cpu_on_start(target_cpu,&ep);}
/lib/psci/psci_on.c
/******************************************************************************* * Generic handler which is called to physically power on a cpu identified by * its mpidr. It performs the generic, architectural, platform setup and state * management to power on the target cpu e.g. it will ensure that * enough information is stashed for it to resume execution in the non-secure * security state. * * The state of all the relevant power domains are changed after calling the * platform handler as it can return error. ******************************************************************************/intpsci_cpu_on_start(u_register_t target_cpu,constentry_point_info_t*ep){int rc;aff_info_state_t target_aff_state;int target_idx =plat_core_pos_by_mpidr(target_cpu); /* Protect against multiple CPUs trying to turn ON the same target CPU */psci_spin_lock_cpu(target_idx); /* * Generic management: Ensure that the cpu is off to be * turned on. * Perform cache maintanence ahead of reading the target CPU state to * ensure that the data is not stale. * There is a theoretical edge case where the cache may contain stale * data for the target CPU data - this can occur under the following * conditions: * - the target CPU is in another cluster from the current * - the target CPU was the last CPU to shutdown on its cluster * - the cluster was removed from coherency as part of the CPU shutdown * * In this case the cache maintenace that was performed as part of the * target CPUs shutdown was not seen by the current CPU's cluster. And * so the cache may contain stale data for the target CPU. */flush_cpu_data_by_index((unsignedint)target_idx,psci_svc_cpu_data.aff_info_state); rc =cpu_on_validate_state(psci_get_aff_info_state_by_idx(target_idx));if (rc != PSCI_E_SUCCESS)goto exit; /* * Call the cpu on handler registered by the Secure Payload Dispatcher * to let it do any bookeeping. If the handler encounters an error, it's * expected to assert within */if ((psci_spd_pm !=NULL) && (psci_spd_pm->svc_on !=NULL))psci_spd_pm->svc_on(target_cpu); /* * Set the Affinity info state of the target cpu to ON_PENDING. * Flush aff_info_state as it will be accessed with caches * turned OFF. */psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_ON_PENDING);flush_cpu_data_by_index((unsignedint)target_idx,psci_svc_cpu_data.aff_info_state); /* * The cache line invalidation by the target CPU after setting the * state to OFF (see psci_do_cpu_off()), could cause the update to * aff_info_state to be invalidated. Retry the update if the target * CPU aff_info_state is not ON_PENDING. */ target_aff_state =psci_get_aff_info_state_by_idx(target_idx);if (target_aff_state != AFF_STATE_ON_PENDING) {assert(target_aff_state == AFF_STATE_OFF);psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_ON_PENDING);flush_cpu_data_by_index((unsignedint)target_idx,psci_svc_cpu_data.aff_info_state);assert(psci_get_aff_info_state_by_idx(target_idx) == AFF_STATE_ON_PENDING); } /* * Perform generic, architecture and platform specific handling. */ /* * Plat. management: Give the platform the current state * of the target cpu to allow it to perform the necessary * steps to power on. */ rc =psci_plat_pm_ops->pwr_domain_on(target_cpu);assert((rc == PSCI_E_SUCCESS) || (rc == PSCI_E_INTERN_FAIL));if (rc == PSCI_E_SUCCESS) /* Store the re-entry information for the non-secure world. */cm_init_context_by_index((unsignedint)target_idx, ep);else { /* Restore the state on error. */psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_OFF);flush_cpu_data_by_index((unsignedint)target_idx,psci_svc_cpu_data.aff_info_state); }exit:psci_spin_unlock_cpu(target_idx);return rc;}
所以目前 ARMv8 情况下,对于 CPU 的唤醒,是由 ATF 实现的,本质应该与电源管理相关,有空这里接着补上
总结
比较关键的地方在于,后续被唤醒的CPU的启动的基址是可以确定的,这是非常重要的,照着 boot cpu 给它设定的路线,thread_info->cpu 在它唤醒之前已经确定,所以它一上来就可以调用 smp_processor_id 作为自己在 per-CPU 变量的偏移,这是非常有意思的,然而提到的人似乎并不多