Linux内核中cpu_ops的实现因架构而已,对于ARM64架构一般通过执行smc指令进入EL3异常,由ATF执行PSCI功能。然后将结果返回给Linux。
这中间涉及到【Linux kernel的cpu_ops、psci_ops】、【SMC/HVC】、【PSCI】、【ATF的PSCI】相关等等。
1. PSCI规格
目前PSCI最新规格为v1.1,这里以v1.0为参考:《POWER STATE COORDINATION INTERFACE (PSCI) System Software on ARM® Systems》。
1.1 PSCI_VERSION
返回当前psci固件版本号。
1.2 CPU_SUSPEND
执行核的suspend操作,一般用于等待被唤醒后继续执行的子系统。
1.3 CPU_OFF
用于hotplug中关闭调用此功能的核。被CPU_OFF关闭的仅能被CPU_ON打开。
1.4 CPU_ON
上电一个核,用于以下两种情况:
- 还未被启动的核
- 已经被CPU_OFF关闭的核。
1.5 AFFINITY_INFO
1.6 MIGRATE
可选。要求单核TOS将指向上下文迁移到一个指定核。
1.7 MIGRATE_INFO_TYPE
允许调用者查询当前TOS对多核支持情况。
1.8 MIGRATE_INFO_UP_CPU
可选。对单核TOS系统,此功能返回TOS当前驻存在哪个核上。
1.9 SYSTEM_OFF
1.10 SYSTEM_RESET
进行系统复位,没有入参也没有返回值。
1.11 PSCI_FEATURES
查询psci固件是否支持指定功能id及其特性。
1.12 CPU_FREEZE
1.13 CPU_DEFAULT_SUSPEND
1.14 NODE_HW_STATE
1.15 SYSTEM_SUSPEND
让系统进入深度低功耗模式。
1.16 SUSPEND_MODE
1.17 PSCI_STAT_RESIDENCY
1.18 PSCI_STAT_COUNT
2. ATF PSCI实现
参考:《《ARM Trusted Firmware》阅读笔记 PSCI》
3. Linux PSCI实现
PSCI主要负责CPU低功耗、热插拔功能,对接cpu_ops实现一系列函数。
在dt中配置psci属性,以及在
3.1 psci dts配置
psci相关配置在dts中定义:
psci { compatible = "arm,psci-0.2"; method = "smc"; };
说明使用的驱动是psci v0.2标准的接口。
3.2 psci驱动初始化
对psci初始化在setup_arch()中调用,psci_dt_init()从dt中解析出psci版本以及实现psci调用的方式(smc)。
void __init setup_arch(char **cmdline_p) { ... if (acpi_disabled) psci_dt_init(); else psci_acpi_init(); ... } int __init psci_dt_init(void) { struct device_node *np; const struct of_device_id *matched_np; psci_initcall_t init_fn; np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);----进行dts设备匹配,这里对应psci-0.2。 if (!np) return -ENODEV; init_fn = (psci_initcall_t)matched_np->data;-------------------------------对应的函数为psci_0_2_init()。 return init_fn(np); } static const struct of_device_id psci_of_match[] __initconst = { { .compatible = "arm,psci", .data = psci_0_1_init}, { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, { .compatible = "arm,psci-1.0", .data = psci_0_2_init}, {}, }; static int __init psci_0_2_init(struct device_node *np) { int err; err = get_set_conduit_method(np);--------------------------------------从dt中解析出psci的method,这里为smc,表示psci功能通过smc(Secure Monitor Call:->EL3调用)实现。其他方式还有svc(Supervisor call:->EL1调用)和hvc(Hypervisor call:->EL2调用)。 if (err) goto out_put_node; /* * Starting with v0.2, the PSCI specification introduced a call * (PSCI_VERSION) that allows probing the firmware version, so * that PSCI function IDs and version specific initialization * can be carried out according to the specific version reported * by firmware */ err = psci_probe(); out_put_node: of_node_put(np); return err; } static int __init psci_probe(void) { u32 ver = psci_get_version();--------------------------------------------------通过SMC的PSCI_0_2_FN_PSCI_VERSION功能id获取PSCI固件版本号。 pr_info("PSCIv%d.%d detected in firmware. ", PSCI_VERSION_MAJOR(ver), PSCI_VERSION_MINOR(ver)); if (PSCI_VERSION_MAJOR(ver) == 0 && PSCI_VERSION_MINOR(ver) < 2) {-------------驱动只支持psci 0.2及以上的psci固件。 pr_err("Conflicting PSCI version detected. "); return -EINVAL; } psci_0_2_set_functions();------------------------------------------------------将linux中使用的psci_ops、arm_pm_off、pm_power_off对齐到具体PSCI的SMC功能id。 psci_init_migrate(); if (PSCI_VERSION_MAJOR(ver) >= 1) {--------------------------------------------对于>=v1.0版本psci,特殊处理suspend。 psci_init_smccc(); psci_init_cpu_suspend(); psci_init_system_suspend(); } return 0; }
3.2.1 PSCI功能实现中转通道:SMC或HVC
kernel实现SMC调用的两种方式:SMC和HVC。get_set_conduit_method()的核心是根据dt中的method字段,选择合适的invoke_psci_fn函数。
enum psci_conduit { PSCI_CONDUIT_NONE, PSCI_CONDUIT_SMC, PSCI_CONDUIT_HVC, }; static int get_set_conduit_method(struct device_node *np) { const char *method; pr_info("probing for conduit method from DT. "); if (of_property_read_string(np, "method", &method)) { pr_warn("missing "method" property "); return -ENXIO; } if (!strcmp("hvc", method)) { set_conduit(PSCI_CONDUIT_HVC); } else if (!strcmp("smc", method)) {------------------------------根据dt中的method字段,设置invoke_psci_fn函数。 set_conduit(PSCI_CONDUIT_SMC); } else { pr_warn("invalid "method" property: %s ", method); return -EINVAL; } return 0; } static void set_conduit(enum psci_conduit conduit)-----------------------HVC和SMC两种访问psci固件的方式,HVC表示当前OS为guest os;SMC表示从EL1直接访问EL3 psci固件。 { switch (conduit) { case PSCI_CONDUIT_HVC: invoke_psci_fn = __invoke_psci_fn_hvc; break; case PSCI_CONDUIT_SMC: invoke_psci_fn = __invoke_psci_fn_smc; break; default: WARN(1, "Unexpected PSCI conduit %d ", conduit); } psci_ops.conduit = conduit; } static unsigned long __invoke_psci_fn_hvc(unsigned long function_id, unsigned long arg0, unsigned long arg1, unsigned long arg2) { struct arm_smccc_res res; arm_smccc_hvc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res); return res.a0; } static unsigned long __invoke_psci_fn_smc(unsigned long function_id, unsigned long arg0, unsigned long arg1, unsigned long arg2) { struct arm_smccc_res res; arm_smccc_smc(function_id, arg0, arg1, arg2, 0, 0, 0, 0, &res); return res.a0; } .macro SMCCC instr .cfi_startproc instr #0 ldr x4, [sp] stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] ldr x4, [sp, #8] cbz x4, 1f /* no quirk structure */ ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 b.ne 1f str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] 1: ret .cfi_endproc .endm /* * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, * unsigned long a3, unsigned long a4, unsigned long a5, * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, * struct arm_smccc_quirk *quirk) */ ENTRY(__arm_smccc_smc) SMCCC smc ENDPROC(__arm_smccc_smc) /* * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, * unsigned long a3, unsigned long a4, unsigned long a5, * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, * struct arm_smccc_quirk *quirk) */ ENTRY(__arm_smccc_hvc) SMCCC hvc ENDPROC(__arm_smccc_hvc)
3.2.2 psci_ops函数集
struct psci_operations psci_ops是Linux下对应psci功能函数集,另外psci_function_id[]下标为LInux psci功能id,值为具体psci规格功能id,psci_function_id[]进行两者的转换。
struct psci_operations { u32 (*get_version)(void);------------------------------------------获取psci固件版本号。 int (*cpu_suspend)(u32 state, unsigned long entry_point);---------- int (*cpu_off)(u32 state); int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); int (*migrate)(unsigned long cpuid); int (*affinity_info)(unsigned long target_affinity, unsigned long lowest_affinity_level); int (*migrate_info_type)(void); enum psci_conduit conduit; enum smccc_version smccc_version; }; struct psci_operations psci_ops = { .conduit = PSCI_CONDUIT_NONE, .smccc_version = SMCCC_VERSION_1_0, }; enum psci_function { PSCI_FN_CPU_SUSPEND, PSCI_FN_CPU_ON, PSCI_FN_CPU_OFF, PSCI_FN_MIGRATE, PSCI_FN_MAX, }; static u32 psci_function_id[PSCI_FN_MAX];
psci_0_2_setfunction()主要设置了psci_ops函数集,以及arm_pm_restart和pm_power_off。
static void __init psci_0_2_set_functions(void) { pr_info("Using standard PSCI v0.2 function IDs "); psci_ops.get_version = psci_get_version; psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_FN_NATIVE(0_2, CPU_SUSPEND); psci_ops.cpu_suspend = psci_cpu_suspend; psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; psci_ops.cpu_off = psci_cpu_off; psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON); psci_ops.cpu_on = psci_cpu_on; psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE); psci_ops.migrate = psci_migrate; psci_ops.affinity_info = psci_affinity_info; psci_ops.migrate_info_type = psci_migrate_info_type; arm_pm_restart = psci_sys_reset; pm_power_off = psci_sys_poweroff; }
对应psci的PSCI_VERSION功能,返回psci固件版本号。
通过PSCI_VERSION_MAJOR()和PSCI_VERSION_MINOR()解析。
对应psci的CPU_SUSPEND功能,state是将要进入的低功耗状态,entry_point是从低功耗状态返回后执行入口地址。
entry_point必须是物理地址或者虚拟机的IPA。
第三个参数是Powerdown功耗状态才会使用。
static int psci_cpu_suspend(u32 state, unsigned long entry_point) { int err; u32 fn; fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; err = invoke_psci_fn(fn, state, entry_point, 0); return psci_to_linux_errno(err); }
对应psci的CPU_OFF功能,让关闭调用此功能的核。
static int psci_cpu_off(u32 state) { int err; u32 fn; fn = psci_function_id[PSCI_FN_CPU_OFF]; err = invoke_psci_fn(fn, state, 0, 0); return psci_to_linux_errno(err); }
对应psci的CPU_ON功能,给一个核上电。
cpuid为需要上电cpu的id;entry_point是CPU上电后运行入口物理地址或IPA,比如这里为secondary_entry()。如果第一次启动,可以传入context_id参数。
static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) { int err; u32 fn; fn = psci_function_id[PSCI_FN_CPU_ON]; err = invoke_psci_fn(fn, cpuid, entry_point, 0); return psci_to_linux_errno(err); } static int cpu_psci_cpu_boot(unsigned int cpu) { int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); if (err) pr_err("failed to boot CPU%d (%d) ", cpu, err); return err; } /* * Secondary entry point that jumps straight into the kernel. Only to * be used where CPUs are brought online dynamically by the kernel. */ ENTRY(secondary_entry) bl el2_setup // Drop to EL1 bl set_cpu_boot_mode_flag b secondary_startup ENDPROC(secondary_entry)
对应psci的MIGRATE功能,将TOS迁移到指定cpuid上执行。
cpuid将要迁移到cpu的id。
static int psci_migrate(unsigned long cpuid) { int err; u32 fn; fn = psci_function_id[PSCI_FN_MIGRATE]; err = invoke_psci_fn(fn, cpuid, 0, 0); return psci_to_linux_errno(err); }
对应psci的AFFINITY_INFO功能,
static int psci_affinity_info(unsigned long target_affinity, unsigned long lowest_affinity_level) { return invoke_psci_fn(PSCI_FN_NATIVE(0_2, AFFINITY_INFO), target_affinity, lowest_affinity_level, 0); }
对应psci的MIGRATE_INFO_TYPE功能,获取TOS在多核环境下迁移能力。
0 - TOS运行在一个核上,但是可以迁移到任何违背CPU_OFF的核。
1 - TOS仅运行在一个核上,不支持MIGRATE功能。
2 - TOS不存在或者不需要MIGRATE功能。
NOT_SUPPORTED - 不需要MIGRATE。
static int psci_migrate_info_type(void) { return invoke_psci_fn(PSCI_0_2_FN_MIGRATE_INFO_TYPE, 0, 0, 0); } /* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */ #define PSCI_0_2_TOS_UP_MIGRATE 0 #define PSCI_0_2_TOS_UP_NO_MIGRATE 1 #define PSCI_0_2_TOS_MP 2
对应psci的SYSTEM_RESET功能,执行系统复位功能。
static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) { invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); } /* * Restart requires that the secondary CPUs stop performing any activity * while the primary CPU resets the system. Systems with multiple CPUs must * provide a HW restart implementation, to ensure that all CPUs reset at once. * This is required so that any code running after reset on the primary CPU * doesn't have to co-ordinate with other CPUs to ensure they aren't still * executing pre-reset code, and using RAM that the primary CPU's code wishes * to use. Implementing such co-ordination would be essentially impossible. */ void machine_restart(char *cmd) { ... /* Now call the architecture specific reboot code. */ if (arm_pm_restart) arm_pm_restart(reboot_mode, cmd);------------------调用psci_sys_reset()函数。 else do_kernel_restart(cmd); /* * Whoops - the architecture was unable to reboot. */ printk("Reboot failed -- System halted "); while (1); }
对应psci的SYSTEM_OFF功能, 关闭系统。无入参和返回值。
static void psci_sys_poweroff(void) { invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); } /* * Power-off simply requires that the secondary CPUs stop performing any * activity (executing tasks, handling interrupts). smp_send_stop() * achieves this. When the system power is turned off, it will take all CPUs * with it. */ void machine_power_off(void) { local_irq_disable(); smp_send_stop(); if (pm_power_off) pm_power_off();---------------------------调用psci_sys_poweroff()。 }
3.3 TOS驻存CPU不允许hotplug
psci_init_migrate()获取当前TOS驻存的CPU id,并赋值给resident_cpu。
/* * Detect the presence of a resident Trusted OS which may cause CPU_OFF to * return DENIED (which would be fatal). */ static void __init psci_init_migrate(void) { unsigned long cpuid; int type, cpu = -1; type = psci_ops.migrate_info_type();-----------------------------------------获取psci支持的TOS服务迁移类型。 if (type == PSCI_0_2_TOS_MP) { pr_info("Trusted OS migration not required "); return; } if (type == PSCI_RET_NOT_SUPPORTED) { pr_info("MIGRATE_INFO_TYPE not supported. "); return; } if (type != PSCI_0_2_TOS_UP_MIGRATE && type != PSCI_0_2_TOS_UP_NO_MIGRATE) { pr_err("MIGRATE_INFO_TYPE returned unknown type (%d) ", type); return; } cpuid = psci_migrate_info_up_cpu();------------------------------------------MIGRATE_INFO_UP_CPU获取TOS驻存CPU的mpidr值。 if (cpuid & ~MPIDR_HWID_BITMASK) { pr_warn("MIGRATE_INFO_UP_CPU reported invalid physical ID (0x%lx) ", cpuid); return; } cpu = get_logical_index(cpuid);----------------------------------------------将mpidr值转换成cpu逻辑id,并赋值给resident_cpu。 resident_cpu = cpu >= 0 ? cpu : -1; pr_info("Trusted OS resident on physical CPU 0x%lx ", cpuid); }
当需要CPU进行hotplug之前,调用cpu_disable来检查CPU是否支持hotplug。如果需要进行hotplug的cpu是resident_cpu,则返回EPERM错误。
bool psci_tos_resident_on(int cpu) { return cpu == resident_cpu; } static int cpu_psci_cpu_disable(unsigned int cpu) { /* Fail early if we don't have CPU_OFF support */ if (!psci_ops.cpu_off) return -EOPNOTSUPP; /* Trusted OS will deny CPU_OFF */ if (psci_tos_resident_on(cpu)) return -EPERM; return 0; } const struct cpu_operations cpu_psci_ops = { .name = "psci", ... #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif };
3.4 v1.0及以上suspend处理
static void __init psci_init_cpu_suspend(void) { int feature = psci_features(psci_function_id[PSCI_FN_CPU_SUSPEND]); if (feature != PSCI_RET_NOT_SUPPORTED) psci_cpu_suspend_feature = feature; } static void __init psci_init_system_suspend(void) { int ret; if (!IS_ENABLED(CONFIG_SUSPEND)) return; ret = psci_features(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND)); if (ret != PSCI_RET_NOT_SUPPORTED) suspend_set_ops(&psci_suspend_ops); } static const struct platform_suspend_ops psci_suspend_ops = { .valid = suspend_valid_only_mem, .enter = psci_system_suspend_enter, }; static int psci_system_suspend_enter(suspend_state_t state) { return cpu_suspend(0, psci_system_suspend); }
对应psci的SYSTEM_SUSPEND功能,实现suspend到ram功能,类似于进入最深度低功耗的CPU_SUSPEND。
成功则没有返回值,失败则返回NOT_SUPPORTED、INVALID_ADDRESS、ALREADY_ON之一。
static int psci_system_suspend(unsigned long unused) { return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), virt_to_phys(cpu_resume), 0, 0); }
4. cpu_ops到psci固件通路
大致调用路径:cpu_ops->cpu_psci_ops->psci_ops->invoke_psci_fn()->SMCC。
dt中低功耗配置:
cpus { #address-cells = <0x2>; #size-cells = <0x0>; cpu@0 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <0x0 0x0>; enable-method = "psci"; clock-latency = <0x186a0>; cpu-idle-states = <0xc 0xd>; }; cpu@1 { ... }; ... };
setup_arch()中调用cpu_read_bootcpu_ops(),经过一系列判断cpu_ops[0]指向cpu_psci_ops。cpu_psci_ops中大部分实现通过调用psci_ops,在函数psci_0_2_set_functions()中指定了psci_ops函数集,基本通过invoke_psci_fn()发送SMC调用由psci固件在EL3执行。
void __init setup_arch(char **cmdline_p) { ... if (acpi_disabled) psci_dt_init(); else psci_acpi_init(); cpu_read_bootcpu_ops(); ... } static inline void __init cpu_read_bootcpu_ops(void) { cpu_read_ops(0); } int __init cpu_read_ops(int cpu) { const char *enable_method = cpu_read_enable_method(cpu);------------------------读取当前cpu在dt中的enable-method配置,这里以psci为例。 if (!enable_method) return -ENODEV; cpu_ops[cpu] = cpu_get_ops(enable_method); if (!cpu_ops[cpu]) { pr_warn("Unsupported enable-method: %s ", enable_method); return -EOPNOTSUPP; } return 0; } static const struct cpu_operations * __init cpu_get_ops(const char *name) { const struct cpu_operations **ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; while (*ops) { if (!strcmp(name, (*ops)->name))-----------------------------------------------在关闭acpi情况下,根据从dt中读取的字符串匹配到cpu_psci_ops函数集。 return *ops; ops++; } return NULL; } static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE .cpu_init_idle = psci_cpu_init_idle, .cpu_suspend = psci_cpu_suspend_enter, #endif .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, #ifdef CONFIG_HOTPLUG_CPU .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif };