From 3b4569e14d30f73cd787e0c4ac0cd019368a0d14 Mon Sep 17 00:00:00 2001 From: Zhang Junyang Date: Mon, 25 Aug 2025 19:58:32 +0800 Subject: [PATCH] Add RISC-V SMP boot entrypoint --- OSDK.toml | 3 +- osdk/src/base_crate/riscv64.ld.template | 5 + ostd/src/arch/riscv/boot/ap_boot.S | 122 +++++++++++++++ ostd/src/arch/riscv/boot/boot.S | 27 +++- ostd/src/arch/riscv/boot/mod.rs | 8 +- ostd/src/arch/riscv/boot/smp.rs | 198 +++++++++++++++++++++++- ostd/src/arch/riscv/irq/ipi.rs | 6 +- ostd/src/arch/riscv/mod.rs | 2 +- ostd/src/arch/riscv/trap/trap.S | 4 +- ostd/src/boot/smp.rs | 4 +- ostd/src/lib.rs | 7 +- 11 files changed, 358 insertions(+), 28 deletions(-) create mode 100644 ostd/src/arch/riscv/boot/ap_boot.S diff --git a/OSDK.toml b/OSDK.toml index f867a4952..bd6c255fa 100644 --- a/OSDK.toml +++ b/OSDK.toml @@ -54,7 +54,8 @@ build.strip_elf = false qemu.args = """\ -cpu rv64,svpbmt=true \ -machine virt \ - -m 8G \ + -m ${MEM-:8G} \ + -smp ${SMP-:1} \ --no-reboot \ -nographic \ -display none \ diff --git a/osdk/src/base_crate/riscv64.ld.template b/osdk/src/base_crate/riscv64.ld.template index 19b69eba4..974ff75c1 100644 --- a/osdk/src/base_crate/riscv64.ld.template +++ b/osdk/src/base_crate/riscv64.ld.template @@ -21,6 +21,11 @@ SECTIONS KEEP(*(.boot.stack)) . = ALIGN(4096); } + + .ap_boot : AT(LOADADDR(.boot) + SIZEOF(.boot)) { + KEEP(*(.ap_boot)) + . = ALIGN(4096); + } # Then, we switch to virtual addresses for all the other sections. . += KERNEL_VMA_OFFSET; diff --git a/ostd/src/arch/riscv/boot/ap_boot.S b/ostd/src/arch/riscv/boot/ap_boot.S new file mode 100644 index 000000000..e0ada269b --- /dev/null +++ b/ostd/src/arch/riscv/boot/ap_boot.S @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: MPL-2.0 */ + +// The boot routine executed by application processors (APs) on RISC-V. + +SATP_MODE_SV39 = 8 << 60 +SATP_MODE_SV48 = 9 << 60 +SATP_PPN_SHIFT = 0 +PAGE_SHIFT = 12 +KERNEL_VMA_OFFSET = 0xffffffff00000000 + +# This is to workaround . +.attribute arch, "rv64imac" + +.section ".ap_boot", "awx", @progbits + +.balign 4096 +.global ap_boot_start +ap_boot_start: + # At this point: + # - a0 contains the hart ID; + # - a1 is the opaque parameter (not used here); + # - We're running in M-mode or S-mode depending on SBI implementation. + + # Save hart ID in t4 for later use (t4 is caller-saved but we'll be careful). + mv t4, a0 + + # Load the page table address in Sv48 mode and set SATP. + lla t0, __ap_boot_page_table_pointer - KERNEL_VMA_OFFSET + ld t1, 0(t0) # Load page table physical address. + li t2, SATP_MODE_SV48 # Sv48 mode. + srli t1, t1, PAGE_SHIFT - SATP_PPN_SHIFT + or t1, t1, t2 + csrw satp, t1 + + # Check if the write to satp succeeds. + # Reference: . + csrr t3, satp + beq t3, t1, flush_tlb_ap + + # This AP doesn't support Sv48. So the `__ap_boot_page_table_pointer` must + # point to a Sv39 page table since we assume that all harts support a same + # paging mode. + xor t1, t1, t2 # Clear previous mode bits. + li t2, SATP_MODE_SV39 # Sv39 mode. + or t1, t1, t2 + csrw satp, t1 + + # Check again if the write to satp succeeds. + csrr t0, satp + beq t0, t1, flush_tlb_ap + + # If the CPU doesn't support either Sv48 or Sv39 this is actually reachable. +unreachable_pa_ap: + j unreachable_pa_ap + +flush_tlb_ap: + sfence.vma # Flush TLB. + + # Now we need to switch to virtual addressing. + # Calculate virtual address using the same method as boot.S. + li t1, KERNEL_VMA_OFFSET + lla sp, ap_boot_virtual - KERNEL_VMA_OFFSET + or sp, sp, t1 + jr sp + +# From here, we're in the canonical virtual address space instead of linear +# physical addresses. + +.data + +.balign 8 +# These will be filled by the BSP before starting APs. +.global __ap_boot_page_table_pointer +__ap_boot_page_table_pointer: + .quad 0 +.global __ap_boot_info_array_pointer +__ap_boot_info_array_pointer: + .quad 0 + +# This is atomically incremented when a new hart is kicked on. The BSP's ID is +# 0, and the first AP's ID will be 1. +__ap_boot_cpu_id_tail: + .quad 1 + +.text + +ap_boot_virtual: + # Atomically update the CPU ID tail and load the previous value to t1. + lla t0, __ap_boot_cpu_id_tail +cmpxchg_load: + lr.d t1, (t0) + addi t2, t1, 1 + sc.d t2, t2, (t0) + bnez t2, cmpxchg_load + + # Get the AP boot info array pointer using absolute addressing. + lla t2, __ap_boot_info_array_pointer + ld t2, 0(t2) # Load pointer to `PerApRawInfo` array. + + # Each PerApRawInfo is 16 bytes (2 * 8-byte pointers). + # Calculate offset: (cpu_id - 1) * 16. + addi t3, t1, -1 # `cpu_id - 1` (BSP is cpu 0, first AP is cpu 1). + slli t3, t3, 4 # Multiply by 16. + add t3, t3, t2 # `t3 = &per_ap_raw_info[cpu_id - 1]` + + # Load stack top and CPU local storage's base address. + ld sp, 0(t3) # Load stack_top. + ld gp, 8(t3) # Load cpu_local pointer. + + # Clear frame pointer for clean stack traces. + li fp, 0 + + # Jump to Rust AP entry point. + # Pass CPU ID as the first argument. + mv a0, t1 + # Pass the hardware hart ID as the second argument. + mv a1, t4 + lla t1, riscv_ap_early_entry + jr t1 + +unreachable_va_ap: + j unreachable_va_ap diff --git a/ostd/src/arch/riscv/boot/boot.S b/ostd/src/arch/riscv/boot/boot.S index 5f78b0f1f..de9c461e6 100644 --- a/ostd/src/arch/riscv/boot/boot.S +++ b/ostd/src/arch/riscv/boot/boot.S @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: MPL-2.0 */ +// The boot routine executed by the bootstrap processor (BSP) on RISC-V. + SATP_MODE_SV39 = 8 << 60 SATP_MODE_SV48 = 9 << 60 SATP_PPN_SHIFT = 0 @@ -21,7 +23,8 @@ KERNEL_VMA_OFFSET = 0xffffffff00000000 _start: # Arguments passed from SBI: # a0 = hart id - # a1 = device tree paddr (not touched) + # a1 = device tree paddr + # We do not touch them here. They are passed to the Rust entrypoint. # Set up the Sv48 page table. # sv48_boot_l4pt[511] = (PPN(sv48_boot_l3pt) << PTE_PPN_SHIFT) | PTE_V @@ -43,16 +46,24 @@ _start: # Check if the write to satp succeeds. If not, try Sv39. # Reference: . csrr t1, satp - beq t0, t1, flush_tlb + beq t0, t1, flush_tlb_bsp # Try loading the Sv39 page table. - la t0, sv39_boot_l3pt + lla t0, sv39_boot_l3pt li t1, SATP_MODE_SV39 srli t0, t0, PAGE_SHIFT - SATP_PPN_SHIFT or t0, t0, t1 csrw satp, t0 -flush_tlb: + # Check again if the write to satp succeeds. + csrr t1, satp + beq t0, t1, flush_tlb_bsp + + # If the CPU doesn't support either Sv48 or Sv39 this is actually reachable. +unreachable_pa_bsp: + j unreachable_pa_bsp + +flush_tlb_bsp: sfence.vma # Update SP/PC to use the virtual address. @@ -105,14 +116,18 @@ boot_stack_bottom: .skip 0x40000 # 256 KiB boot_stack_top: -# From here, we're in the .text section: we no longer use physical address. +# From here, we no longer use physical address. + .text _start_virt: - # Initialize GP to the CPU-local start address. + # Initialize GP to the CPU-local storage's base address. .extern __cpu_local_start lla gp, __cpu_local_start # Jump into Rust code. lla t0, riscv_boot jr t0 + +unreachable_va_bsp: + j unreachable_va_bsp diff --git a/ostd/src/arch/riscv/boot/mod.rs b/ostd/src/arch/riscv/boot/mod.rs index 888a31a68..572b84103 100644 --- a/ostd/src/arch/riscv/boot/mod.rs +++ b/ostd/src/arch/riscv/boot/mod.rs @@ -104,11 +104,17 @@ fn parse_initramfs_range() -> Option<(usize, usize)> { Some((initrd_start, initrd_end)) } +static mut BOOTSTRAP_HART_ID: u32 = u32::MAX; + /// The entry point of the Rust code portion of Asterinas. #[no_mangle] -pub extern "C" fn riscv_boot(_hart_id: usize, device_tree_paddr: usize) -> ! { +pub extern "C" fn riscv_boot(hart_id: usize, device_tree_paddr: usize) -> ! { early_println!("Enter riscv_boot"); + // SAFETY: We only write it once this time. Other processors will only read + // it. And other processors are not booted yet so there's no races. + unsafe { BOOTSTRAP_HART_ID = hart_id as u32 }; + let device_tree_ptr = paddr_to_vaddr(device_tree_paddr) as *const u8; let fdt = unsafe { fdt::Fdt::from_ptr(device_tree_ptr).unwrap() }; DEVICE_TREE.call_once(|| fdt); diff --git a/ostd/src/arch/riscv/boot/smp.rs b/ostd/src/arch/riscv/boot/smp.rs index afc8eabef..9f913325f 100644 --- a/ostd/src/arch/riscv/boot/smp.rs +++ b/ostd/src/arch/riscv/boot/smp.rs @@ -2,16 +2,198 @@ //! Multiprocessor Boot Support -use crate::{boot::smp::PerApRawInfo, mm::Paddr}; +use core::arch::global_asm; + +use crate::{ + boot::smp::PerApRawInfo, + cpu_local_cell, + mm::{Paddr, Vaddr}, +}; + +// Include the AP boot assembly code +global_asm!(include_str!("ap_boot.S")); pub(crate) fn count_processors() -> Option { - Some(1) + let mut hart_count = 0; + + for_each_hart_id(|_| hart_count += 1); + + if hart_count == 0 { + None + } else { + Some(hart_count) + } } -pub(crate) unsafe fn bringup_all_aps( - _info_ptr: *const PerApRawInfo, - _pr_ptr: Paddr, - _num_cpus: u32, -) { - unimplemented!() +/// Brings up all application processors. +/// +/// Following the x86 naming, all the harts that are not the bootstrapping hart +/// are "application processors". +/// +/// # Safety +/// +/// The caller must ensure that +/// 1. we're in the boot context of the BSP, +/// 2. all APs have not yet been booted, and +/// 3. the arguments are valid to boot APs. +pub(crate) unsafe fn bringup_all_aps(info_ptr: *const PerApRawInfo, pt_ptr: Paddr, num_cpus: u32) { + if num_cpus <= 1 { + return; // No APs to bring up + } + + // SAFETY: We ensure that the variables are properly defined in the assembly + // code and they are safe to write here. + unsafe { + fill_boot_info_ptr(info_ptr); + fill_boot_page_table_ptr(pt_ptr); + } + + let bsp_id = get_bootstrap_hart_id(); + + log::info!("Bootstrapping hart is {}, booting all other harts", bsp_id); + + for_each_hart_id(|hart_id| { + if hart_id != bsp_id { + // SAFETY: Each hart ID is iterated over exactly once here so we + // won't boot up one hart twice. Other safety constraints are + // upheld by the caller. + unsafe { bringup_ap(hart_id) }; + } + }); +} + +fn for_each_hart_id(mut f: impl FnMut(u32)) { + let Some(device_tree) = super::DEVICE_TREE.get() else { + f(get_bootstrap_hart_id()); + return; + }; + + device_tree.cpus().for_each(|cpu_node| { + if let Some(device_type) = cpu_node.property("device_type") { + if device_type.as_str() == Some("cpu") { + if let Some(reg) = cpu_node.property("reg") { + f(reg.as_usize().unwrap() as u32); + } + } + } + }) +} + +/// # Safety +/// +/// The caller must ensure that +/// 1. we're in the boot context of the BSP, +/// 2. the resources for APs' boot (e.g., stacks and the page table) are set +/// up correctly, and +/// 3. the `hart_id` hart hasn't booted. +unsafe fn bringup_ap(hart_id: u32) { + log::info!("Starting hart {}", hart_id); + + // Use SBI to start the hart directly at the AP boot code + let result = sbi_rt::hart_start( + hart_id as usize, + get_ap_boot_start_addr(), + /* Unused */ 0, + ); + + if result.error == 0 { + log::debug!("Successfully started hart {}", hart_id); + } else { + log::error!( + "Failed to start hart {}: error code {}", + hart_id, + result.error + ); + } +} + +/// Fills the AP boot info array pointer. +/// +/// # Safety +/// +/// The caller must ensure that `__ap_boot_info_array_pointer` is safe to write. +unsafe fn fill_boot_info_ptr(info_ptr: *const PerApRawInfo) { + extern "C" { + static mut __ap_boot_info_array_pointer: *const PerApRawInfo; + } + + // SAFETY: The safety conditions are upheld by the caller. + unsafe { + __ap_boot_info_array_pointer = info_ptr; + } +} + +/// Fills the AP boot page table pointer. +/// +/// # Safety +/// +/// The caller must ensure that `__ap_boot_page_table_pointer` is safe to write. +unsafe fn fill_boot_page_table_ptr(pt_ptr: Paddr) { + extern "C" { + static mut __ap_boot_page_table_pointer: Paddr; + } + + // SAFETY: The safety conditions are upheld by the caller. + unsafe { + __ap_boot_page_table_pointer = pt_ptr; + } +} + +fn get_ap_boot_start_addr() -> Paddr { + const KERNEL_VMA: Vaddr = 0xffffffff00000000; + + let addr: Paddr; + + // We need to load the address of the symbol in assembly to avoid the + // linker relocation error. The symbol is not reachable using IP-offset + // addressing without the virtual offset. + unsafe { + core::arch::asm!( + "la {0}, ap_boot_start + {1}", + out(reg) addr, + const KERNEL_VMA, + ); + } + + addr - KERNEL_VMA +} + +fn get_bootstrap_hart_id() -> u32 { + // SAFETY: This function is only called after `riscv_boot` has written the + // ID. There must be a fence after the write if we are APs, and no other + // writes happen after that so it's safe to read. + unsafe { super::BOOTSTRAP_HART_ID } +} + +pub(in crate::arch) fn get_current_hart_id() -> u32 { + let id = AP_CURRENT_HART_ID.load(); + if id == u32::MAX { + // This function cannot be called before `riscv_ap_early_entry`, which + // is the entrypoint and initializes `AP_CURRENT_HART_ID`. So if the ID + // is not written we must be the BSP. + get_bootstrap_hart_id() + } else { + id + } +} + +cpu_local_cell! { + static AP_CURRENT_HART_ID: u32 = u32::MAX; +} + +// Since in RISC-V we cannot read the hart ID in S mode, the hart ID is +// delivered from the bootloader. We need to record the hart ID with another +// layer of entry point. +#[no_mangle] +unsafe extern "C" fn riscv_ap_early_entry(cpu_id: u32, hart_id: u32) -> ! { + unsafe extern "C" { + fn ap_early_entry(cpu_id: u32) -> !; + } + + // CPU local memory could be accessed here since we are the AP and the BSP + // must have initialized it. + AP_CURRENT_HART_ID.store(hart_id); + + // SAFETY: This is valid to call and only called once. + unsafe { ap_early_entry(cpu_id) }; } diff --git a/ostd/src/arch/riscv/irq/ipi.rs b/ostd/src/arch/riscv/irq/ipi.rs index e7f1c2f42..c32c174fe 100644 --- a/ostd/src/arch/riscv/irq/ipi.rs +++ b/ostd/src/arch/riscv/irq/ipi.rs @@ -9,9 +9,9 @@ use crate::cpu::PinCurrentCpu; pub(crate) struct HwCpuId(u32); impl HwCpuId { - pub(crate) fn read_current(_guard: &dyn PinCurrentCpu) -> Self { - // TODO: Support SMP in RISC-V. - Self(0) + #[expect(unused_variables)] + pub(crate) fn read_current(guard: &dyn PinCurrentCpu) -> Self { + Self(crate::arch::boot::smp::get_current_hart_id()) } } diff --git a/ostd/src/arch/riscv/mod.rs b/ostd/src/arch/riscv/mod.rs index 6e6cf0b76..2d66adc71 100644 --- a/ostd/src/arch/riscv/mod.rs +++ b/ostd/src/arch/riscv/mod.rs @@ -60,7 +60,7 @@ pub(crate) unsafe fn late_init_on_bsp() { } pub(crate) unsafe fn init_on_ap() { - unimplemented!() + unimplemented!(); } /// Return the frequency of TSC. The unit is Hz. diff --git a/ostd/src/arch/riscv/trap/trap.S b/ostd/src/arch/riscv/trap/trap.S index 377feafbd..40f25167d 100644 --- a/ostd/src/arch/riscv/trap/trap.S +++ b/ostd/src/arch/riscv/trap/trap.S @@ -101,7 +101,7 @@ end_trap_from_user: LOAD_SP s10, 10 LOAD_SP s11, 11 LOAD_SP ra, 12 - # not callee-saved, but is used to store mhartid + # not callee-saved, but is used to store the CPU-local storage's base address LOAD_SP gp, 13 addi sp, sp, 14 * XLENB @@ -124,7 +124,7 @@ run_user: # (regs: &mut RawUserContext) STORE_SP s10, 10 STORE_SP s11, 11 STORE_SP ra, 12 - # not callee-saved, but is used to store mhartid + # not callee-saved, but is used to store the CPU-local storage's base address STORE_SP gp, 13 mv t0, sp diff --git a/ostd/src/boot/smp.rs b/ostd/src/boot/smp.rs index 695f534bb..1d8443a74 100644 --- a/ostd/src/boot/smp.rs +++ b/ostd/src/boot/smp.rs @@ -7,7 +7,7 @@ use alloc::{boxed::Box, collections::btree_map::BTreeMap, vec::Vec}; use spin::Once; use crate::{ - arch::{boot::smp::bringup_all_aps, irq::HwCpuId}, + arch::irq::HwCpuId, mm::{ frame::{meta::KernelMeta, Segment}, paddr_to_vaddr, FrameAllocOptions, HasPaddrRange, PAGE_SIZE, @@ -107,7 +107,7 @@ pub(crate) unsafe fn boot_all_aps() { let pt_ptr = crate::mm::page_table::boot_pt::with_borrow(|pt| pt.root_address()).unwrap(); // SAFETY: It's the right time to boot APs (guaranteed by the caller) and // the arguments are valid to boot APs (generated above). - unsafe { bringup_all_aps(info_ptr, pt_ptr, num_cpus as u32) }; + unsafe { crate::arch::boot::smp::bringup_all_aps(info_ptr, pt_ptr, num_cpus as u32) }; wait_for_all_aps_started(num_cpus); diff --git a/ostd/src/lib.rs b/ostd/src/lib.rs index 29e66ef51..524fa76aa 100644 --- a/ostd/src/lib.rs +++ b/ostd/src/lib.rs @@ -71,7 +71,6 @@ pub use self::{error::Error, prelude::Result}; // TODO: We need to refactor this function to make it more modular and // make inter-initialization-dependencies more clear and reduce usages of // boot stage only global variables. -#[doc(hidden)] unsafe fn init() { arch::enable_cpu_features(); @@ -90,9 +89,9 @@ unsafe fn init() { logger::init(); // SAFETY: - // 1. They are only called once in the boot context of the BSP. - // 2. The number of CPUs are available because ACPI has been initialized. - // 3. CPU-local storage has NOT been used. + // 1. They are only called once in the boot context of the BSP. + // 2. The number of CPUs are available because ACPI has been initialized. + // 3. CPU-local storage has NOT been used. unsafe { cpu::init_on_bsp() }; // SAFETY: We are on the BSP and APs are not yet started.