Separate VMAR load and activation

This commit is contained in:
Ruihan Li 2025-12-12 23:59:43 +08:00
parent e2f7b0eb73
commit 4fdf400d2a
3 changed files with 252 additions and 32 deletions

View File

@ -8,16 +8,14 @@ use ostd::{
user::UserContextApi,
};
use super::process_vm::activate_vmar;
use crate::{
fs::{
fs_resolver::{FsResolver, PathOrInode},
utils::Inode,
},
fs::{fs_resolver::PathOrInode, utils::Inode},
prelude::*,
process::{
ContextUnshareAdminApi, Credentials, Process,
posix_thread::{PosixThread, ThreadLocal, ThreadName, sigkill_other_threads, thread_table},
process_vm::{MAX_LEN_STRING_ARG, MAX_NR_STRING_ARGS, unshare_and_renew_vmar},
process_vm::{MAX_LEN_STRING_ARG, MAX_NR_STRING_ARGS, new_vmar_and_map},
program_loader::{ProgramToLoad, elf::ElfLoadInfo},
signal::{
HandlePendingSignal, PauseReason, SigStack,
@ -25,6 +23,7 @@ use crate::{
signals::kernel::KernelSignal,
},
},
vm::vmar::Vmar,
};
pub fn do_execve(
@ -55,6 +54,9 @@ pub fn do_execve(
let program_to_load =
ProgramToLoad::build_from_inode(elf_inode.clone(), &fs_resolver, argv, envp)?;
let new_vmar = new_vmar_and_map(elf_file.clone());
let elf_load_info = program_to_load.load_to_vmar(new_vmar.as_ref(), &fs_resolver)?;
// Ensure no other thread is concurrently performing exit_group or execve.
// If such an operation is in progress, return EAGAIN.
let mut task_set = ctx.process.tasks().lock();
@ -73,7 +75,7 @@ pub fn do_execve(
// After this point, failures in subsequent operations are fatal: the process
// state may be left inconsistent and it can never return to user mode.
let res = do_execve_no_return(ctx, user_context, elf_file, &fs_resolver, program_to_load);
let res = do_execve_no_return(ctx, user_context, elf_file, new_vmar, &elf_load_info);
if res.is_err() {
ctx.posix_thread
@ -127,8 +129,8 @@ fn do_execve_no_return(
ctx: &Context,
user_context: &mut UserContext,
elf_file: PathOrInode,
fs_resolver: &FsResolver,
program_to_load: ProgramToLoad,
new_vmar: Arc<Vmar>,
elf_load_info: &ElfLoadInfo,
) -> Result<()> {
let Context {
process,
@ -142,20 +144,16 @@ fn do_execve_no_return(
wait_other_threads_exit(ctx)?;
thread_table::make_current_main_thread(ctx);
let elf_load_info = {
let mut vmar = ctx.process.lock_vmar();
// Reset the virtual memory state.
unshare_and_renew_vmar(ctx, &mut vmar, elf_file.clone());
// Load the binary into the process's address space
program_to_load.load_to_vmar(vmar.unwrap(), fs_resolver)?
};
// Activate the new VMAR, where the ELF has been loaded, in the current context.
activate_vmar(ctx, new_vmar);
// After the program has been successfully loaded, the virtual memory of the current process
// is initialized. Hence, it is necessary to clear the previously recorded robust list.
*thread_local.robust_list().borrow_mut() = None;
thread_local.clear_child_tid().set(0);
// Set up the CPU context.
set_cpu_context(thread_local, user_context, &elf_load_info);
set_cpu_context(thread_local, user_context, elf_load_info);
// Apply file-capability changes.
apply_caps_from_exec(process, posix_thread, elf_file.inode())?;

View File

@ -213,23 +213,14 @@ pub(super) fn new_vmar_and_map(executable_file: PathOrInode) -> Arc<Vmar> {
new_vmar
}
/// Unshares and renews the [`Vmar`] of the current process.
pub(super) fn unshare_and_renew_vmar(
ctx: &Context,
vmar: &mut ProcessVmarGuard,
executable_file: PathOrInode,
) {
let new_vmar = Vmar::new(ProcessVm::new(executable_file));
let guard = disable_preempt();
/// Activates the [`Vmar`] in the current process's context.
pub(super) fn activate_vmar(ctx: &Context, new_vmar: Arc<Vmar>) {
let mut vmar_guard = ctx.process.lock_vmar();
// Disable preemption because `thread_local::vmar()` will be borrowed during a context switch.
let _preempt_guard = disable_preempt();
*ctx.thread_local.vmar().borrow_mut() = Some(new_vmar.clone());
new_vmar.vm_space().activate();
vmar.set_vmar(Some(new_vmar));
drop(guard);
let new_vmar = vmar.unwrap();
new_vmar
.process_vm()
.heap()
.alloc_and_map(new_vmar)
.unwrap();
vmar_guard.set_vmar(Some(new_vmar));
}

View File

@ -187,6 +187,237 @@ FN_TEST(interp_too_long)
}
END_TEST()
FN_TEST(interp_missing_nul)
{
unsigned int i;
i = push_interp("/dev/zero");
--elf.phdr[i].p_filesz;
TEST_ERRNO(do_execve(), ENOEXEC);
pop_interp();
}
END_TEST()
FN_TEST(interp_trunc_eof)
{
unsigned int i;
i = push_interp("/dev/zero");
elf.phdr[i].p_offset = sizeof(elf) - 1;
TEST_ERRNO(do_execve(), EIO);
pop_interp();
}
END_TEST()
FN_TEST(interp_overflow_end)
{
unsigned int i;
int j;
i = push_interp("/dev/zero");
elf.phdr[i].p_offset = ~(Elf64_Off)0 - 10;
for (j = 9; j <= 12; ++j) {
elf.phdr[i].p_filesz = j;
TEST_ERRNO(do_execve(), EINVAL);
}
pop_interp();
}
END_TEST()
FN_TEST(interp_doesnt_exist)
{
push_interp("/tmp/file_doesnt_exist");
TEST_ERRNO(do_execve(), ENOENT);
pop_interp();
}
END_TEST()
FN_TEST(interp_bad_perm)
{
push_interp("/dev/zero");
TEST_ERRNO(do_execve(), EACCES);
pop_interp();
}
END_TEST()
FN_TEST(interp_bad_format)
{
int fd;
push_interp("/tmp/my_lib");
fd = TEST_SUCC(open("/tmp/my_lib", O_WRONLY | O_CREAT | O_TRUNC, 0755));
TEST_RES(write(fd, "#!", 2), _ret == 2);
TEST_SUCC(close(fd));
TEST_ERRNO(do_execve(), EIO);
TEST_SUCC(truncate("/tmp/my_lib", PAGE_SIZE));
TEST_ERRNO(do_execve(), ELIBBAD);
push_interp("/tmp/my_lib");
TEST_ERRNO(do_execve(), ELIBBAD);
pop_interp();
TEST_SUCC(unlink("/tmp/my_lib"));
pop_interp();
}
END_TEST()
// FIXME: Linux drops the old MM before creating new mappings. Any failures
// during the creation of new mappings result in a fatal signal, causing the
// error code to be lost. Asterinas now attempts to return these error codes
// to the user space.
#ifdef __asterinas__
static int do_execve_fatal(void)
{
return do_execve();
}
#else /* __asterinas__ */
#include <sys/ptrace.h>
#include <sys/user.h>
#include <sys/syscall.h>
static int do_execve_fatal(void)
{
pid_t pid;
int status;
struct user_regs_struct regs;
write_all(EXE_PATH, &elf, sizeof(elf));
pid = CHECK(fork());
if (pid == 0) {
CHECK(ptrace(PTRACE_TRACEME));
CHECK(raise(SIGSTOP));
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wnonnull"
CHECK(execve(EXE_PATH, NULL, NULL));
#pragma GCC diagnostic pop
exit(EXIT_FAILURE);
}
CHECK_WITH(wait(&status), _ret == pid && WIFSTOPPED(status) &&
WSTOPSIG(status) == SIGSTOP);
// Wait until `execve` starts.
CHECK(ptrace(PTRACE_SYSCALL, pid, NULL, NULL));
CHECK_WITH(wait(&status), _ret == pid && WIFSTOPPED(status) &&
WSTOPSIG(status) == SIGTRAP);
// Wait until `execve` completes.
CHECK(ptrace(PTRACE_SYSCALL, pid, NULL, NULL));
CHECK_WITH(wait(&status), _ret == pid && WIFSTOPPED(status) &&
WSTOPSIG(status) == SIGTRAP);
// Get `execve` results.
CHECK_WITH(ptrace(PTRACE_GETREGS, pid, NULL, &regs),
_ret >= 0 && regs.orig_rax == __NR_execve);
CHECK(ptrace(PTRACE_DETACH, pid, NULL, NULL));
CHECK_WITH(wait(&status), _ret == pid && WIFSIGNALED(status) &&
WTERMSIG(status) == SIGSEGV);
errno = -regs.rax;
return errno == 0 ? 0 : -1;
}
#endif /* __asterinas__ */
FN_TEST(unaglined_offset)
{
++elf.phdr[0].p_offset;
TEST_ERRNO(do_execve_fatal(), EINVAL);
--elf.phdr[0].p_offset;
}
END_TEST()
FN_TEST(unaligned_vaddr)
{
++elf.phdr[0].p_vaddr;
TEST_ERRNO(do_execve_fatal(), EINVAL);
--elf.phdr[0].p_vaddr;
}
END_TEST()
FN_TEST(overflow_offset_plus_filesz)
{
long old;
old = elf.phdr[0].p_offset;
elf.phdr[0].p_offset = (~(Elf64_Off)0 & ~(PAGE_SIZE - 1)) +
(elf.phdr[0].p_offset & (PAGE_SIZE - 1));
TEST_ERRNO(do_execve_fatal(), EINVAL);
elf.phdr[0].p_offset -= PAGE_SIZE;
TEST_ERRNO(do_execve_fatal(), EOVERFLOW);
elf.phdr[0].p_offset = old;
}
END_TEST()
FN_TEST(overflow_vaddr_plus_memsz)
{
int i;
long old;
old = elf.phdr[0].p_memsz;
elf.phdr[0].p_memsz = ~(Elf64_Xword)0 - elf.phdr[0].p_vaddr;
for (i = 0; i < 3; ++i) {
TEST_ERRNO(do_execve_fatal(), ENOMEM);
++elf.phdr[0].p_memsz;
}
elf.phdr[0].p_memsz = old;
}
END_TEST()
FN_TEST(underflow_vaddr)
{
long old;
old = elf.phdr[0].p_vaddr;
elf.phdr[0].p_vaddr = PAGE_SIZE;
TEST_ERRNO(do_execve_fatal(), EPERM);
elf.phdr[0].p_vaddr = old;
}
END_TEST()
FN_TEST(memsz_larger_than_filesz)
{
// It's okay for `p_memsz` to be larger than `p_filesz`.
// However, the trailing part must be zeroed out. This is
// an example of when zeroing fails.
elf.phdr[0].p_filesz += PAGE_SIZE - 1;
elf.phdr[0].p_memsz += PAGE_SIZE;
// FIXME: This fails in Linux. However, Asterinas inserts
// zero pages at the end of private mappings, so it will
// succeed. See
// <https://github.com/asterinas/asterinas/blob/9c4f644bd9287da1815a13115fbdfa914d8426f0/kernel/src/vm/vmar/vm_mapping.rs#L443-L447>.
#ifndef __asterinas__
TEST_ERRNO(do_execve_fatal(), EFAULT);
#endif
elf.phdr[0].p_memsz -= PAGE_SIZE;
elf.phdr[0].p_filesz -= PAGE_SIZE - 1;
}
END_TEST()
FN_TEST(filesz_larger_than_memsz)
{
--elf.phdr[0].p_memsz;
TEST_ERRNO(do_execve_fatal(), EINVAL);
++elf.phdr[0].p_memsz;
}
END_TEST()
FN_SETUP(cleanup)
{
CHECK(unlink(EXE_PATH));