在之前都是类似的,由 boot cpu 完成引导,对于其他核,可能系统架构中存在电源管理的模块,需要写相应的寄存器来达到唤醒的操作,至于使能之后的默认地址,也一定是架构相关的,比如默认是物理地址的 0x0000000
那么当boot cpu 完成基础的模块初始化之后,就可以尝试唤醒其他核了,这部分暂时我还没细究,但是关键一定是操作寄存器来使能,可能还有寄存器用来填写使能之后的初始指令的地址,因为这都是可以实现的。
为了能让 smp_processor_id 起作用,跳转到 C 代码之前的关键一步就是得设置栈寄存器指向 init_task ,那么其他的核引导的时候,关键的一点就在于让后面引导的 CPU 拿到自己的栈页面,所以 boot cpu 做的事情就是分配一个页面,然后把地址放在固定的地方,然后唤醒一个核,让它能顺利的引导。
其他的核仍要做一些相应的初始化的,因为很多的寄存器都是每个核都有一个备份,比如仍要打开MMU,以及配置中断模式下的栈,初始化自己的时钟(还包括对中断控制器一些配置)等等,这些初始化函数,都是在 boot cpu 做初始化的代码注册的,让后面的核只需要做一些配置,大部分的工作已经完成了,当然了,它还得通知 boot cpu 完成了初始化,它还得继续唤醒下一个核。
Main CPU
汇编过程
下面从 boot cpu 的角度出发,这类的代码非常多的分析,我会尽量简单的描述,这里的关键有几点
判断处理器类型,完成特定的初始化
使能 MMU
解压 .data 的数据
初始化 bss
初始化栈寄存器(指向 init_task)
这几点的关键就不需要赘述了吧,提一提第一点是如何实现的吧
/arch/arm/kernel/head.S
__HEAD
ENTRY(stext)
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type @ r5=procinfo r9=cpuid
movs r10, r5 @ invalid processor (r5=0)?
...
@ before turn on MMU
add pc, r10, #PROCINFO_INITFUNC
....
struct proc_info_list {
...
unsigned long __cpu_mm_mmu_flags; /* used by head.S */
unsigned long __cpu_io_mmu_flags; /* used by head.S */
unsigned long __cpu_flush; /* used by head.S */
...
};
/*
* Read processor ID register (CP#15, CR0), and look up in the linker-built
* supported processor list. Note that we can't use the absolute addresses
* for the __proc_info lists since we aren't running with the MMU on
* (and therefore, we are not in the correct address space). We have to
* calculate the offset.
*
* r9 = cpuid
* Returns:
* r3, r4, r6 corrupted
* r5 = proc_info pointer in physical address space
* r9 = cpuid (preserved)
*/
__CPUINIT
__lookup_processor_type:
adr r3, __lookup_processor_type_data
ldmia r3, {r4 - r6}
sub r3, r3, r4 @ get offset between virt&phys
add r5, r5, r3 @ convert virt addresses to
add r6, r6, r3 @ physical address space
1: ldmia r5, {r3, r4} @ value, mask
and r4, r4, r9 @ mask wanted bits
teq r3, r4
beq 2f
add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list)
cmp r5, r6
blo 1b
mov r5, #0 @ unknown processor
2: mov pc, lr
ENDPROC(__lookup_processor_type)
/*
* Look in <asm/procinfo.h> for information about the __proc_info structure.
*/
.align 2
.type __lookup_processor_type_data, %object
__lookup_processor_type_data:
.long .
.long __proc_info_begin
.long __proc_info_end
.size __lookup_processor_type_data, . - __lookup_processor_type_dat
int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
/*
* We need to tell the secondary core where to find
* its stack and the page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
secondary_data.pgdir = virt_to_phys(idmap_pgd);
secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
/*
* Now bring the CPU into our world.
*/
ret = boot_secondary(cpu, idle);
if (ret == 0) {
/*
* CPU was successfully started, wait for it
* to come online or time out.
*/
wait_for_completion_timeout(&cpu_running,
msecs_to_jiffies(1000));
}
secondary_data.stack = NULL;
secondary_data.pgdir = 0;
return ret;
}
上面赋值的全局变量,正是当其他的核引导的时候会访问的,当然了,我们需要注意一点,Linux 正是用栈来判断 CPU 的 id 的,而这个 id 其实都是 boot CPU 设置的,因为它们的栈都是它分配(也可以说 init_task,这两者其实紧密相连)
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
{
if (smp_ops.smp_boot_secondary)
return smp_ops.smp_boot_secondary(cpu, idle);
return -ENOSYS;
}
接下来就涉及到了具体的架构了,但最后都会让 cpu 执行到 secondary_startup 这个函数,先不看具体的架构,来关注一下其他核
Secondary CPU
代码有些更改了顺序,更好理解
/*
* Initial data for bringing up a secondary CPU.
*/
struct secondary_data {
unsigned long pgdir;
unsigned long swapper_pg_dir;
void *stack;
};
extern struct secondary_data secondary_data;
.type __secondary_data, %object
__secondary_data:
.long .
.long secondary_data
.long __secondary_switched
ENTRY(secondary_startup)
/*
* Common entry point for secondary CPUs.
*
* Ensure that we're in SVC mode, and IRQs are disabled. Lookup
* the processor type - there is no need to check the machine type
* as it has already been validated by the primary processor.
*/
safe_svcmode_maskall r9
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type
movs r10, r5 @ invalid processor?
moveq r0, #'p' @ yes, error 'p'
beq __error_p
/*
* Use the page tables supplied from __cpu_up.
*/
adr r4, __secondary_data
ldmia r4, {r5, r7, r12} @ address to jump to after
sub lr, r4, r5 @ mmu has been enabled
ldr r4, [r7, lr] @ get secondary_data.pgdir
add r7, r7, #4
ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir
adr lr, BSYM(__enable_mmu) @ return address
mov r13, r12 @ __secondary_switched address
ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor
@ (return control reg)
ENDPROC(secondary_startup)
ENTRY(__secondary_switched)
ldr sp, [r7, #4] @ get secondary_data.stack
mov fp, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
所以,启用了 MMU 之后,最后来到了 secondary_start_kernel
/*
* This is the secondary CPU boot entry. We're using this CPUs
* idle thread stack, but a set of temporary page tables.
*/
asmlinkage void __cpuinit secondary_start_kernel(void)
{
struct mm_struct *mm = &init_mm;
unsigned int cpu;
/*
* The identity mapping is uncached (strongly ordered), so
* switch away from it before attempting any exclusive accesses.
*/
cpu_switch_mm(mm->pgd, mm);
local_flush_bp_all();
enter_lazy_tlb(mm, current);
local_flush_tlb_all();
cpu = smp_processor_id();
...
cpu_init();
/*
* Give the platform a chance to do its own initialisation.
*/
if (smp_ops.smp_secondary_init)
smp_ops.smp_secondary_init(cpu);
notify_cpu_starting(cpu);
/*
* OK, now it's safe to let the boot CPU continue. Wait for
* the CPU migration code to notice that the CPU is online
* before we continue - which happens after __cpu_up returns.
*/
set_cpu_online(cpu, true);
complete(&cpu_running);
/*
* Setup the percpu timer for this CPU.
*/
percpu_timer_setup();
local_irq_enable();
local_fiq_enable();
/*
* OK, it's off to the idle thread for us
*/
cpu_startup_entry(CPUHP_ONLINE);
}
几个比较关键的点
MMU 相关
对于 ARM 还需要初始化其他模式的sp 寄存器
调用 notify_cpu_starting 调用其他模块注册的初始化函数
通知 boot cpu 完成初始化,注意此时 boot cpu 正在等待
进入 idle 循环,idle 框架也是值得探索的,但这不是重点
这些初始化,保证了后面起来的 CPU,也有自己时钟,也能完成进程调度等功能,已经和主 CPU 没有任何区别
这里也有一个有意思的问题,就是为什么刚起来的 CPU,就可以使用 smp_processor_id,正是因为它的栈(或者说 idle task)是由 boot cpu 所指定,CPU id 也正是如此,当然这个 id 是虚拟 id,CPU 也有自己的 id 寄存器,Linux 使用的是前者,因为这屏蔽了架构,不同的架构的 CPU id 可能千奇百怪(ARM 就是 Mpidr 寄存器)
也就是说,引导CPU,默认 id就是 0,它唤醒的CPU,是我自己加+1,然后给你创建了 init 进程,那么 thread_info->cpuit's up to the boot cpu~
PSCI 平台的唤醒流程
下面以实习公司的架构唤醒流程为例,看看到底是怎么个玩法~ 接着刚刚分析的 smp_ops
void __init smp_set_ops(struct smp_operations *ops)
{
if (ops)
smp_ops = *ops;
};
...
...
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
{
if (smp_ops.smp_boot_secondary)
return smp_ops.smp_boot_secondary(cpu, idle);
return -ENOSYS;
}
而在另外一处
/arch/arm/kernel/setup.c
if (is_smp()) {
if (!mdesc->smp_init || !mdesc->smp_init()) {
if (psci_smp_available())
smp_set_ops(&psci_smp_ops);
else if (mdesc->smp)
smp_set_ops(mdesc->smp);
}
smp_init_cpus();
smp_build_mpidr_hash();
}
...
static psci_fn *invoke_psci_fn;
...
static unsigned long __invoke_psci_fn_smc(unsigned long function_id,
unsigned long arg0, unsigned long arg1,
unsigned long arg2)
{
struct arm_smccc_res res;
arm_smccc_smc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res);
return res.a0;
}
static void set_conduit(enum psci_conduit conduit)
{
switch (conduit) {
case PSCI_CONDUIT_HVC:
invoke_psci_fn = __invoke_psci_fn_hvc;
break;
case PSCI_CONDUIT_SMC:
invoke_psci_fn = __invoke_psci_fn_smc;
break;
default:
WARN(1, "Unexpected PSCI conduit %d\n", conduit);
}
psci_ops.conduit = conduit;
}
static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
{
int err;
u32 fn;
fn = psci_function_id[PSCI_FN_CPU_ON];
err = invoke_psci_fn(fn, cpuid, entry_point, 0);
return psci_to_linux_errno(err);
}
来到了这
/include/linux/arm-smccc.h
/**
* __arm_smccc_hvc() - make HVC calls
* @a0-a7: arguments passed in registers 0 to 7
* @res: result values from registers 0 to 3
* @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required.
*
* This function is used to make HVC calls following SMC Calling
* Convention. The content of the supplied param are copied to registers 0
* to 7 prior to the HVC instruction. The return values are updated with
* the content from register 0 to 3 on return from the HVC instruction. An
* optional quirk structure provides vendor specific behavior.
*/
asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3, unsigned long a4,
unsigned long a5, unsigned long a6, unsigned long a7,
struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
#define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL)
应该可以猜测的到,是汇编实现的了
/arch/arm/kernel/smccc-call.S
/*
* Wrap c macros in asm macros to delay expansion until after the
* SMCCC asm macro is expanded.
*/
.macro SMCCC_SMC
__SMC(0)
.endm
.macro SMCCC instr
UNWIND( .fnstart)
mov r12, sp
push {r4-r7}
UNWIND( .save {r4-r7})
ldm r12, {r4-r7} @ r0-r7 8 args
\instr
pop {r4-r7}
ldr r12, [sp, #(4 * 4)] @ now r12 points at &res
stm r12, {r0-r3}
bx lr
UNWIND( .fnend)
.endm
/*
* void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2,
* unsigned long a3, unsigned long a4, unsigned long a5,
* unsigned long a6, unsigned long a7, struct arm_smccc_res *res,
* struct arm_smccc_quirk *quirk)
*/
ENTRY(__arm_smccc_smc)
SMCCC SMCCC_SMC
ENDPROC(__arm_smccc_smc)
SMC 是一个宏,分别针对 Thumb 和 ARM 下做了区分,这两者的指令集不一样。上面的操作想要理解,需要知道 ARM Calling Convention,我以前写过一篇只是简单的介绍了,但是最后的一张图便可以理解,当 bl 指令调用的时候,sp 指向的是 第五个参数的地址,理解这个,上述的操作就很自然了
虽然还没分析过它的存在,其实跟 x86 的操作系统非常的相似,以前的 ARM 并不会对内存寄存器等资源设定屏蔽层,如今引入了,而实现就是一个 eret 指令,那不是跟 x86 不谋而合吗,这里是它的 git
/*
* Top-level Standard Service SMC handler. This handler will in turn dispatch
* calls to PSCI SMC handler
*/
static uintptr_t std_svc_smc_handler(uint32_t smc_fid,
u_register_t x1,
u_register_t x2,
u_register_t x3,
u_register_t x4,
void *cookie,
void *handle,
u_register_t flags)
{
/*
* Dispatch PSCI calls to PSCI SMC handler and return its return
* value
*/
if (is_psci_fid(smc_fid)) {
uint64_t ret;
ret = psci_smc_handler(smc_fid, x1, x2, x3, x4,
cookie, handle, flags);
#if ENABLE_RUNTIME_INSTRUMENTATION
PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
RT_INSTR_EXIT_PSCI,
PMF_NO_CACHE_MAINT);
#endif
SMC_RET1(handle, ret);
}
...
}
arm-trusted-firmware/lib/psci/psci_main.c
/*******************************************************************************
* PSCI top level handler for servicing SMCs.
******************************************************************************/
u_register_t psci_smc_handler(uint32_t smc_fid,
u_register_t x1,
u_register_t x2,
u_register_t x3,
u_register_t x4,
void *cookie,
void *handle,
u_register_t flags)
{
u_register_t ret;
if (is_caller_secure(flags))
return (u_register_t)SMC_UNK;
/* Check the fid against the capabilities */
if ((psci_caps & define_psci_cap(smc_fid)) == 0U)
return (u_register_t)SMC_UNK;
if (((smc_fid >> FUNCID_CC_SHIFT) & FUNCID_CC_MASK) == SMC_32) {
/* 32-bit PSCI function, clear top parameter bits */
uint32_t r1 = (uint32_t)x1;
uint32_t r2 = (uint32_t)x2;
uint32_t r3 = (uint32_t)x3;
switch (smc_fid) {
case PSCI_VERSION:
ret = (u_register_t)psci_version();
break;
case PSCI_CPU_OFF:
ret = (u_register_t)psci_cpu_off();
break;
case PSCI_CPU_SUSPEND_AARCH32:
ret = (u_register_t)psci_cpu_suspend(r1, r2, r3);
break;
case PSCI_CPU_ON_AARCH32:
ret = (u_register_t)psci_cpu_on(r1, r2, r3);
break;
...
...
}
}
同一个文件中,还可以找到
/*******************************************************************************
* PSCI frontend api for servicing SMCs. Described in the PSCI spec.
******************************************************************************/
int psci_cpu_on(u_register_t target_cpu,
uintptr_t entrypoint,
u_register_t context_id)
{
int rc;
entry_point_info_t ep;
/* Determine if the cpu exists of not */
rc = psci_validate_mpidr(target_cpu);
if (rc != PSCI_E_SUCCESS)
return PSCI_E_INVALID_PARAMS;
/* Validate the entry point and get the entry_point_info */
rc = psci_validate_entry_point(&ep, entrypoint, context_id);
if (rc != PSCI_E_SUCCESS)
return rc;
/*
* To turn this cpu on, specify which power
* levels need to be turned on
*/
return psci_cpu_on_start(target_cpu, &ep);
}
/lib/psci/psci_on.c
/*******************************************************************************
* Generic handler which is called to physically power on a cpu identified by
* its mpidr. It performs the generic, architectural, platform setup and state
* management to power on the target cpu e.g. it will ensure that
* enough information is stashed for it to resume execution in the non-secure
* security state.
*
* The state of all the relevant power domains are changed after calling the
* platform handler as it can return error.
******************************************************************************/
int psci_cpu_on_start(u_register_t target_cpu,
const entry_point_info_t *ep)
{
int rc;
aff_info_state_t target_aff_state;
int target_idx = plat_core_pos_by_mpidr(target_cpu);
/* Protect against multiple CPUs trying to turn ON the same target CPU */
psci_spin_lock_cpu(target_idx);
/*
* Generic management: Ensure that the cpu is off to be
* turned on.
* Perform cache maintanence ahead of reading the target CPU state to
* ensure that the data is not stale.
* There is a theoretical edge case where the cache may contain stale
* data for the target CPU data - this can occur under the following
* conditions:
* - the target CPU is in another cluster from the current
* - the target CPU was the last CPU to shutdown on its cluster
* - the cluster was removed from coherency as part of the CPU shutdown
*
* In this case the cache maintenace that was performed as part of the
* target CPUs shutdown was not seen by the current CPU's cluster. And
* so the cache may contain stale data for the target CPU.
*/
flush_cpu_data_by_index((unsigned int)target_idx,
psci_svc_cpu_data.aff_info_state);
rc = cpu_on_validate_state(psci_get_aff_info_state_by_idx(target_idx));
if (rc != PSCI_E_SUCCESS)
goto exit;
/*
* Call the cpu on handler registered by the Secure Payload Dispatcher
* to let it do any bookeeping. If the handler encounters an error, it's
* expected to assert within
*/
if ((psci_spd_pm != NULL) && (psci_spd_pm->svc_on != NULL))
psci_spd_pm->svc_on(target_cpu);
/*
* Set the Affinity info state of the target cpu to ON_PENDING.
* Flush aff_info_state as it will be accessed with caches
* turned OFF.
*/
psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_ON_PENDING);
flush_cpu_data_by_index((unsigned int)target_idx,
psci_svc_cpu_data.aff_info_state);
/*
* The cache line invalidation by the target CPU after setting the
* state to OFF (see psci_do_cpu_off()), could cause the update to
* aff_info_state to be invalidated. Retry the update if the target
* CPU aff_info_state is not ON_PENDING.
*/
target_aff_state = psci_get_aff_info_state_by_idx(target_idx);
if (target_aff_state != AFF_STATE_ON_PENDING) {
assert(target_aff_state == AFF_STATE_OFF);
psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_ON_PENDING);
flush_cpu_data_by_index((unsigned int)target_idx,
psci_svc_cpu_data.aff_info_state);
assert(psci_get_aff_info_state_by_idx(target_idx) ==
AFF_STATE_ON_PENDING);
}
/*
* Perform generic, architecture and platform specific handling.
*/
/*
* Plat. management: Give the platform the current state
* of the target cpu to allow it to perform the necessary
* steps to power on.
*/
rc = psci_plat_pm_ops->pwr_domain_on(target_cpu);
assert((rc == PSCI_E_SUCCESS) || (rc == PSCI_E_INTERN_FAIL));
if (rc == PSCI_E_SUCCESS)
/* Store the re-entry information for the non-secure world. */
cm_init_context_by_index((unsigned int)target_idx, ep);
else {
/* Restore the state on error. */
psci_set_aff_info_state_by_idx(target_idx, AFF_STATE_OFF);
flush_cpu_data_by_index((unsigned int)target_idx,
psci_svc_cpu_data.aff_info_state);
}
exit:
psci_spin_unlock_cpu(target_idx);
return rc;
}
所以目前 ARMv8 情况下,对于 CPU 的唤醒,是由 ATF 实现的,本质应该与电源管理相关,有空这里接着补上
总结
比较关键的地方在于,后续被唤醒的CPU的启动的基址是可以确定的,这是非常重要的,照着 boot cpu 给它设定的路线,thread_info->cpu 在它唤醒之前已经确定,所以它一上来就可以调用 smp_processor_id 作为自己在 per-CPU 变量的偏移,这是非常有意思的,然而提到的人似乎并不多