Add RISC-V SMP boot entrypoint

This commit is contained in:
Zhang Junyang 2025-08-25 19:58:32 +08:00 committed by Tate, Hongliang Tian
parent c77edbace8
commit 3b4569e14d
11 changed files with 358 additions and 28 deletions

View File

@ -54,7 +54,8 @@ build.strip_elf = false
qemu.args = """\
-cpu rv64,svpbmt=true \
-machine virt \
-m 8G \
-m ${MEM-:8G} \
-smp ${SMP-:1} \
--no-reboot \
-nographic \
-display none \

View File

@ -21,6 +21,11 @@ SECTIONS
KEEP(*(.boot.stack))
. = ALIGN(4096);
}
.ap_boot : AT(LOADADDR(.boot) + SIZEOF(.boot)) {
KEEP(*(.ap_boot))
. = ALIGN(4096);
}
# Then, we switch to virtual addresses for all the other sections.
. += KERNEL_VMA_OFFSET;

View File

@ -0,0 +1,122 @@
/* SPDX-License-Identifier: MPL-2.0 */
// The boot routine executed by application processors (APs) on RISC-V.
SATP_MODE_SV39 = 8 << 60
SATP_MODE_SV48 = 9 << 60
SATP_PPN_SHIFT = 0
PAGE_SHIFT = 12
KERNEL_VMA_OFFSET = 0xffffffff00000000
# This is to workaround <https://github.com/rust-lang/rust/issues/80608>.
.attribute arch, "rv64imac"
.section ".ap_boot", "awx", @progbits
.balign 4096
.global ap_boot_start
ap_boot_start:
# At this point:
# - a0 contains the hart ID;
# - a1 is the opaque parameter (not used here);
# - We're running in M-mode or S-mode depending on SBI implementation.
# Save hart ID in t4 for later use (t4 is caller-saved but we'll be careful).
mv t4, a0
# Load the page table address in Sv48 mode and set SATP.
lla t0, __ap_boot_page_table_pointer - KERNEL_VMA_OFFSET
ld t1, 0(t0) # Load page table physical address.
li t2, SATP_MODE_SV48 # Sv48 mode.
srli t1, t1, PAGE_SHIFT - SATP_PPN_SHIFT
or t1, t1, t2
csrw satp, t1
# Check if the write to satp succeeds.
# Reference: <https://riscv.github.io/riscv-isa-manual/snapshot/privileged/#satp>.
csrr t3, satp
beq t3, t1, flush_tlb_ap
# This AP doesn't support Sv48. So the `__ap_boot_page_table_pointer` must
# point to a Sv39 page table since we assume that all harts support a same
# paging mode.
xor t1, t1, t2 # Clear previous mode bits.
li t2, SATP_MODE_SV39 # Sv39 mode.
or t1, t1, t2
csrw satp, t1
# Check again if the write to satp succeeds.
csrr t0, satp
beq t0, t1, flush_tlb_ap
# If the CPU doesn't support either Sv48 or Sv39 this is actually reachable.
unreachable_pa_ap:
j unreachable_pa_ap
flush_tlb_ap:
sfence.vma # Flush TLB.
# Now we need to switch to virtual addressing.
# Calculate virtual address using the same method as boot.S.
li t1, KERNEL_VMA_OFFSET
lla sp, ap_boot_virtual - KERNEL_VMA_OFFSET
or sp, sp, t1
jr sp
# From here, we're in the canonical virtual address space instead of linear
# physical addresses.
.data
.balign 8
# These will be filled by the BSP before starting APs.
.global __ap_boot_page_table_pointer
__ap_boot_page_table_pointer:
.quad 0
.global __ap_boot_info_array_pointer
__ap_boot_info_array_pointer:
.quad 0
# This is atomically incremented when a new hart is kicked on. The BSP's ID is
# 0, and the first AP's ID will be 1.
__ap_boot_cpu_id_tail:
.quad 1
.text
ap_boot_virtual:
# Atomically update the CPU ID tail and load the previous value to t1.
lla t0, __ap_boot_cpu_id_tail
cmpxchg_load:
lr.d t1, (t0)
addi t2, t1, 1
sc.d t2, t2, (t0)
bnez t2, cmpxchg_load
# Get the AP boot info array pointer using absolute addressing.
lla t2, __ap_boot_info_array_pointer
ld t2, 0(t2) # Load pointer to `PerApRawInfo` array.
# Each PerApRawInfo is 16 bytes (2 * 8-byte pointers).
# Calculate offset: (cpu_id - 1) * 16.
addi t3, t1, -1 # `cpu_id - 1` (BSP is cpu 0, first AP is cpu 1).
slli t3, t3, 4 # Multiply by 16.
add t3, t3, t2 # `t3 = &per_ap_raw_info[cpu_id - 1]`
# Load stack top and CPU local storage's base address.
ld sp, 0(t3) # Load stack_top.
ld gp, 8(t3) # Load cpu_local pointer.
# Clear frame pointer for clean stack traces.
li fp, 0
# Jump to Rust AP entry point.
# Pass CPU ID as the first argument.
mv a0, t1
# Pass the hardware hart ID as the second argument.
mv a1, t4
lla t1, riscv_ap_early_entry
jr t1
unreachable_va_ap:
j unreachable_va_ap

View File

@ -1,5 +1,7 @@
/* SPDX-License-Identifier: MPL-2.0 */
// The boot routine executed by the bootstrap processor (BSP) on RISC-V.
SATP_MODE_SV39 = 8 << 60
SATP_MODE_SV48 = 9 << 60
SATP_PPN_SHIFT = 0
@ -21,7 +23,8 @@ KERNEL_VMA_OFFSET = 0xffffffff00000000
_start:
# Arguments passed from SBI:
# a0 = hart id
# a1 = device tree paddr (not touched)
# a1 = device tree paddr
# We do not touch them here. They are passed to the Rust entrypoint.
# Set up the Sv48 page table.
# sv48_boot_l4pt[511] = (PPN(sv48_boot_l3pt) << PTE_PPN_SHIFT) | PTE_V
@ -43,16 +46,24 @@ _start:
# Check if the write to satp succeeds. If not, try Sv39.
# Reference: <https://riscv.github.io/riscv-isa-manual/snapshot/privileged/#satp>.
csrr t1, satp
beq t0, t1, flush_tlb
beq t0, t1, flush_tlb_bsp
# Try loading the Sv39 page table.
la t0, sv39_boot_l3pt
lla t0, sv39_boot_l3pt
li t1, SATP_MODE_SV39
srli t0, t0, PAGE_SHIFT - SATP_PPN_SHIFT
or t0, t0, t1
csrw satp, t0
flush_tlb:
# Check again if the write to satp succeeds.
csrr t1, satp
beq t0, t1, flush_tlb_bsp
# If the CPU doesn't support either Sv48 or Sv39 this is actually reachable.
unreachable_pa_bsp:
j unreachable_pa_bsp
flush_tlb_bsp:
sfence.vma
# Update SP/PC to use the virtual address.
@ -105,14 +116,18 @@ boot_stack_bottom:
.skip 0x40000 # 256 KiB
boot_stack_top:
# From here, we're in the .text section: we no longer use physical address.
# From here, we no longer use physical address.
.text
_start_virt:
# Initialize GP to the CPU-local start address.
# Initialize GP to the CPU-local storage's base address.
.extern __cpu_local_start
lla gp, __cpu_local_start
# Jump into Rust code.
lla t0, riscv_boot
jr t0
unreachable_va_bsp:
j unreachable_va_bsp

View File

@ -104,11 +104,17 @@ fn parse_initramfs_range() -> Option<(usize, usize)> {
Some((initrd_start, initrd_end))
}
static mut BOOTSTRAP_HART_ID: u32 = u32::MAX;
/// The entry point of the Rust code portion of Asterinas.
#[no_mangle]
pub extern "C" fn riscv_boot(_hart_id: usize, device_tree_paddr: usize) -> ! {
pub extern "C" fn riscv_boot(hart_id: usize, device_tree_paddr: usize) -> ! {
early_println!("Enter riscv_boot");
// SAFETY: We only write it once this time. Other processors will only read
// it. And other processors are not booted yet so there's no races.
unsafe { BOOTSTRAP_HART_ID = hart_id as u32 };
let device_tree_ptr = paddr_to_vaddr(device_tree_paddr) as *const u8;
let fdt = unsafe { fdt::Fdt::from_ptr(device_tree_ptr).unwrap() };
DEVICE_TREE.call_once(|| fdt);

View File

@ -2,16 +2,198 @@
//! Multiprocessor Boot Support
use crate::{boot::smp::PerApRawInfo, mm::Paddr};
use core::arch::global_asm;
use crate::{
boot::smp::PerApRawInfo,
cpu_local_cell,
mm::{Paddr, Vaddr},
};
// Include the AP boot assembly code
global_asm!(include_str!("ap_boot.S"));
pub(crate) fn count_processors() -> Option<u32> {
Some(1)
let mut hart_count = 0;
for_each_hart_id(|_| hart_count += 1);
if hart_count == 0 {
None
} else {
Some(hart_count)
}
}
pub(crate) unsafe fn bringup_all_aps(
_info_ptr: *const PerApRawInfo,
_pr_ptr: Paddr,
_num_cpus: u32,
) {
unimplemented!()
/// Brings up all application processors.
///
/// Following the x86 naming, all the harts that are not the bootstrapping hart
/// are "application processors".
///
/// # Safety
///
/// The caller must ensure that
/// 1. we're in the boot context of the BSP,
/// 2. all APs have not yet been booted, and
/// 3. the arguments are valid to boot APs.
pub(crate) unsafe fn bringup_all_aps(info_ptr: *const PerApRawInfo, pt_ptr: Paddr, num_cpus: u32) {
if num_cpus <= 1 {
return; // No APs to bring up
}
// SAFETY: We ensure that the variables are properly defined in the assembly
// code and they are safe to write here.
unsafe {
fill_boot_info_ptr(info_ptr);
fill_boot_page_table_ptr(pt_ptr);
}
let bsp_id = get_bootstrap_hart_id();
log::info!("Bootstrapping hart is {}, booting all other harts", bsp_id);
for_each_hart_id(|hart_id| {
if hart_id != bsp_id {
// SAFETY: Each hart ID is iterated over exactly once here so we
// won't boot up one hart twice. Other safety constraints are
// upheld by the caller.
unsafe { bringup_ap(hart_id) };
}
});
}
fn for_each_hart_id(mut f: impl FnMut(u32)) {
let Some(device_tree) = super::DEVICE_TREE.get() else {
f(get_bootstrap_hart_id());
return;
};
device_tree.cpus().for_each(|cpu_node| {
if let Some(device_type) = cpu_node.property("device_type") {
if device_type.as_str() == Some("cpu") {
if let Some(reg) = cpu_node.property("reg") {
f(reg.as_usize().unwrap() as u32);
}
}
}
})
}
/// # Safety
///
/// The caller must ensure that
/// 1. we're in the boot context of the BSP,
/// 2. the resources for APs' boot (e.g., stacks and the page table) are set
/// up correctly, and
/// 3. the `hart_id` hart hasn't booted.
unsafe fn bringup_ap(hart_id: u32) {
log::info!("Starting hart {}", hart_id);
// Use SBI to start the hart directly at the AP boot code
let result = sbi_rt::hart_start(
hart_id as usize,
get_ap_boot_start_addr(),
/* Unused */ 0,
);
if result.error == 0 {
log::debug!("Successfully started hart {}", hart_id);
} else {
log::error!(
"Failed to start hart {}: error code {}",
hart_id,
result.error
);
}
}
/// Fills the AP boot info array pointer.
///
/// # Safety
///
/// The caller must ensure that `__ap_boot_info_array_pointer` is safe to write.
unsafe fn fill_boot_info_ptr(info_ptr: *const PerApRawInfo) {
extern "C" {
static mut __ap_boot_info_array_pointer: *const PerApRawInfo;
}
// SAFETY: The safety conditions are upheld by the caller.
unsafe {
__ap_boot_info_array_pointer = info_ptr;
}
}
/// Fills the AP boot page table pointer.
///
/// # Safety
///
/// The caller must ensure that `__ap_boot_page_table_pointer` is safe to write.
unsafe fn fill_boot_page_table_ptr(pt_ptr: Paddr) {
extern "C" {
static mut __ap_boot_page_table_pointer: Paddr;
}
// SAFETY: The safety conditions are upheld by the caller.
unsafe {
__ap_boot_page_table_pointer = pt_ptr;
}
}
fn get_ap_boot_start_addr() -> Paddr {
const KERNEL_VMA: Vaddr = 0xffffffff00000000;
let addr: Paddr;
// We need to load the address of the symbol in assembly to avoid the
// linker relocation error. The symbol is not reachable using IP-offset
// addressing without the virtual offset.
unsafe {
core::arch::asm!(
"la {0}, ap_boot_start + {1}",
out(reg) addr,
const KERNEL_VMA,
);
}
addr - KERNEL_VMA
}
fn get_bootstrap_hart_id() -> u32 {
// SAFETY: This function is only called after `riscv_boot` has written the
// ID. There must be a fence after the write if we are APs, and no other
// writes happen after that so it's safe to read.
unsafe { super::BOOTSTRAP_HART_ID }
}
pub(in crate::arch) fn get_current_hart_id() -> u32 {
let id = AP_CURRENT_HART_ID.load();
if id == u32::MAX {
// This function cannot be called before `riscv_ap_early_entry`, which
// is the entrypoint and initializes `AP_CURRENT_HART_ID`. So if the ID
// is not written we must be the BSP.
get_bootstrap_hart_id()
} else {
id
}
}
cpu_local_cell! {
static AP_CURRENT_HART_ID: u32 = u32::MAX;
}
// Since in RISC-V we cannot read the hart ID in S mode, the hart ID is
// delivered from the bootloader. We need to record the hart ID with another
// layer of entry point.
#[no_mangle]
unsafe extern "C" fn riscv_ap_early_entry(cpu_id: u32, hart_id: u32) -> ! {
unsafe extern "C" {
fn ap_early_entry(cpu_id: u32) -> !;
}
// CPU local memory could be accessed here since we are the AP and the BSP
// must have initialized it.
AP_CURRENT_HART_ID.store(hart_id);
// SAFETY: This is valid to call and only called once.
unsafe { ap_early_entry(cpu_id) };
}

View File

@ -9,9 +9,9 @@ use crate::cpu::PinCurrentCpu;
pub(crate) struct HwCpuId(u32);
impl HwCpuId {
pub(crate) fn read_current(_guard: &dyn PinCurrentCpu) -> Self {
// TODO: Support SMP in RISC-V.
Self(0)
#[expect(unused_variables)]
pub(crate) fn read_current(guard: &dyn PinCurrentCpu) -> Self {
Self(crate::arch::boot::smp::get_current_hart_id())
}
}

View File

@ -60,7 +60,7 @@ pub(crate) unsafe fn late_init_on_bsp() {
}
pub(crate) unsafe fn init_on_ap() {
unimplemented!()
unimplemented!();
}
/// Return the frequency of TSC. The unit is Hz.

View File

@ -101,7 +101,7 @@ end_trap_from_user:
LOAD_SP s10, 10
LOAD_SP s11, 11
LOAD_SP ra, 12
# not callee-saved, but is used to store mhartid
# not callee-saved, but is used to store the CPU-local storage's base address
LOAD_SP gp, 13
addi sp, sp, 14 * XLENB
@ -124,7 +124,7 @@ run_user: # (regs: &mut RawUserContext)
STORE_SP s10, 10
STORE_SP s11, 11
STORE_SP ra, 12
# not callee-saved, but is used to store mhartid
# not callee-saved, but is used to store the CPU-local storage's base address
STORE_SP gp, 13
mv t0, sp

View File

@ -7,7 +7,7 @@ use alloc::{boxed::Box, collections::btree_map::BTreeMap, vec::Vec};
use spin::Once;
use crate::{
arch::{boot::smp::bringup_all_aps, irq::HwCpuId},
arch::irq::HwCpuId,
mm::{
frame::{meta::KernelMeta, Segment},
paddr_to_vaddr, FrameAllocOptions, HasPaddrRange, PAGE_SIZE,
@ -107,7 +107,7 @@ pub(crate) unsafe fn boot_all_aps() {
let pt_ptr = crate::mm::page_table::boot_pt::with_borrow(|pt| pt.root_address()).unwrap();
// SAFETY: It's the right time to boot APs (guaranteed by the caller) and
// the arguments are valid to boot APs (generated above).
unsafe { bringup_all_aps(info_ptr, pt_ptr, num_cpus as u32) };
unsafe { crate::arch::boot::smp::bringup_all_aps(info_ptr, pt_ptr, num_cpus as u32) };
wait_for_all_aps_started(num_cpus);

View File

@ -71,7 +71,6 @@ pub use self::{error::Error, prelude::Result};
// TODO: We need to refactor this function to make it more modular and
// make inter-initialization-dependencies more clear and reduce usages of
// boot stage only global variables.
#[doc(hidden)]
unsafe fn init() {
arch::enable_cpu_features();
@ -90,9 +89,9 @@ unsafe fn init() {
logger::init();
// SAFETY:
// 1. They are only called once in the boot context of the BSP.
// 2. The number of CPUs are available because ACPI has been initialized.
// 3. CPU-local storage has NOT been used.
// 1. They are only called once in the boot context of the BSP.
// 2. The number of CPUs are available because ACPI has been initialized.
// 3. CPU-local storage has NOT been used.
unsafe { cpu::init_on_bsp() };
// SAFETY: We are on the BSP and APs are not yet started.