Add `MemfdInode`

This commit is contained in:
Wang Siyuan 2025-10-30 07:54:37 +00:00 committed by Ruihan Li
parent 6e8dac0c36
commit 7aa6a47612
4 changed files with 173 additions and 57 deletions

View File

@ -33,7 +33,7 @@ use crate::{
prelude::*,
process::{signal::PollHandle, Gid, Uid},
time::clocks::RealTimeCoarseClock,
vm::vmo::Vmo,
vm::{memfd::MemfdInode, vmo::Vmo},
};
/// A volatile file system whose data and metadata exists only in memory.
@ -93,7 +93,7 @@ impl FileSystem for RamFs {
}
/// An inode of `RamFs`.
struct RamInode {
pub struct RamInode {
/// Inode inner specifics
inner: Inner,
/// Inode metadata
@ -147,6 +147,10 @@ impl Inner {
Self::NamedPipe(NamedPipe::new().unwrap())
}
fn new_file_in_memfd(this: Weak<MemfdInode>) -> Self {
Self::File(PageCache::new(this).unwrap())
}
fn as_direntry(&self) -> Option<&RwLock<DirEntry>> {
match self {
Self::Dir(dir_entry) => Some(dir_entry),
@ -425,17 +429,22 @@ impl RamInode {
})
}
fn new_file_detached(mode: InodeMode, uid: Uid, gid: Gid) -> Arc<Self> {
Arc::new_cyclic(|weak_self| RamInode {
inner: Inner::new_file(weak_self.clone()),
fn new_file_detached_in_memfd(
weak_self: &Weak<MemfdInode>,
mode: InodeMode,
uid: Uid,
gid: Gid,
) -> Self {
Self {
inner: Inner::new_file_in_memfd(weak_self.clone()),
metadata: SpinLock::new(InodeMeta::new(mode, uid, gid)),
ino: weak_self.as_ptr() as u64,
typ: InodeType::File,
this: weak_self.clone(),
this: Weak::new(),
fs: Weak::new(),
extension: Extension::new(),
xattr: RamXattr::new(),
})
}
}
fn new_symlink(fs: &Arc<RamFs>, mode: InodeMode, uid: Uid, gid: Gid) -> Arc<Self> {
@ -1243,11 +1252,16 @@ impl Inode for RamInode {
}
}
/// Creates a RAM inode that is detached from any `RamFs`.
/// Creates a RAM inode that is detached from any `RamFs`, and resides in a `MemfdInode`.
///
// TODO: Add "anonymous inode fs" and link the inode to it.
pub fn new_detached_inode(mode: InodeMode, uid: Uid, gid: Gid) -> Arc<dyn Inode> {
RamInode::new_file_detached(mode, uid, gid)
pub fn new_detached_inode_in_memfd(
weak_self: &Weak<MemfdInode>,
mode: InodeMode,
uid: Uid,
gid: Gid,
) -> RamInode {
RamInode::new_file_detached_in_memfd(weak_self, mode, uid, gid)
}
fn write_lock_two_direntries_by_ino<'a>(

View File

@ -3,7 +3,7 @@
//! Ramfs based on PageCache
use fs::RamFsType;
pub use fs::{new_detached_inode, RamFs};
pub use fs::{new_detached_inode_in_memfd, RamFs, RamInode};
mod fs;
mod xattr;

View File

@ -1,25 +1,12 @@
// SPDX-License-Identifier: MPL-2.0
use bitflags::bitflags;
use super::SyscallReturn;
use crate::{
fs::file_table::FdFlags,
prelude::*,
vm::memfd::{MemfdFile, MAX_MEMFD_NAME_LEN},
vm::memfd::{MemfdFile, MemfdFlags, MAX_MEMFD_NAME_LEN},
};
bitflags! {
struct MemfdFlags: u32 {
/// Close on exec.
const MFD_CLOEXEC = 1 << 0;
/// Allow sealing operations on this file.
const MFD_ALLOW_SEALING = 1 << 1;
/// Create in the hugetlbfs.
const MFD_HUGETLB = 1 << 2;
}
}
pub fn sys_memfd_create(name_addr: Vaddr, flags: u32, ctx: &Context) -> Result<SyscallReturn> {
// FIXME: When `name` is too long, `read_cstring` returns `EFAULT`. However,
// according to <https://man7.org/linux/man-pages/man2/memfd_create.2.html>,
@ -29,8 +16,6 @@ pub fn sys_memfd_create(name_addr: Vaddr, flags: u32, ctx: &Context) -> Result<S
.read_cstring(name_addr, MAX_MEMFD_NAME_LEN + 1)?;
debug!("sys_memfd_create: name = {:?}, flags = {}", name, flags);
let memfd_file = MemfdFile::new(name.to_string_lossy().as_ref())?;
let fd = {
let memfd_flags = MemfdFlags::from_bits(flags).ok_or(Errno::EINVAL)?;
let fd_flags = if memfd_flags.contains(MemfdFlags::MFD_CLOEXEC) {
@ -45,6 +30,16 @@ pub fn sys_memfd_create(name_addr: Vaddr, flags: u32, ctx: &Context) -> Result<S
if memfd_flags.contains(MemfdFlags::MFD_ALLOW_SEALING) {
warn!("sealing not supported");
}
if memfd_flags.contains(MemfdFlags::MFD_NOEXEC_SEAL | MemfdFlags::MFD_EXEC) {
return_errno_with_message!(
Errno::EINVAL,
"`MFD_EXEC` and `MFD_NOEXEC_SEAL` cannot be specified together"
);
}
let memfd_file = MemfdFile::new(name.to_string_lossy().as_ref(), memfd_flags)?;
file_table_locked.insert(Arc::new(memfd_file), fd_flags)
};

View File

@ -3,19 +3,25 @@
//! Memfd Implementation.
use alloc::format;
use core::sync::atomic::{AtomicU32, Ordering};
use core::{
sync::atomic::{AtomicU32, Ordering},
time::Duration,
};
use aster_block::bio::BioWaiter;
use inherit_methods_macro::inherit_methods;
use spin::Once;
use crate::{
events::IoEvents,
fs::{
file_handle::{FileLike, Mappable},
inode_handle::{do_fallocate_util, do_resize_util, do_seek_util},
ramfs::new_detached_inode,
ramfs::{new_detached_inode_in_memfd, RamFs, RamInode},
utils::{
mkmod, AccessMode, FallocMode, Inode, InodeMode, IoctlCmd, Metadata, SeekFrom,
StatusFlags,
chmod, mkmod, AccessMode, CachePage, Extension, FallocMode, FileSystem, Inode,
InodeMode, InodeType, IoctlCmd, Metadata, PageCacheBackend, SeekFrom, StatusFlags,
XattrName, XattrNamespace, XattrSetFlags,
},
},
prelude::*,
@ -23,6 +29,7 @@ use crate::{
signal::{PollHandle, Pollable},
Gid, Uid,
},
vm::vmo::Vmo,
};
/// Maximum file name length for `memfd_create`, excluding the final `\0` byte.
@ -30,48 +37,133 @@ use crate::{
/// See <https://man7.org/linux/man-pages/man2/memfd_create.2.html>
pub const MAX_MEMFD_NAME_LEN: usize = 249;
pub struct MemfdFile {
inode: Arc<dyn Inode>,
pub struct MemfdInode {
inode: RamInode,
#[expect(dead_code)]
name: String,
}
#[inherit_methods(from = "self.inode")]
impl PageCacheBackend for MemfdInode {
fn read_page_async(&self, idx: usize, frame: &CachePage) -> Result<BioWaiter>;
fn write_page_async(&self, idx: usize, frame: &CachePage) -> Result<BioWaiter>;
fn npages(&self) -> usize;
}
#[inherit_methods(from = "self.inode")]
impl Inode for MemfdInode {
fn metadata(&self) -> Metadata;
fn size(&self) -> usize;
fn atime(&self) -> Duration;
fn set_atime(&self, time: Duration);
fn mtime(&self) -> Duration;
fn set_mtime(&self, time: Duration);
fn ctime(&self) -> Duration;
fn set_ctime(&self, time: Duration);
fn ino(&self) -> u64;
fn type_(&self) -> InodeType;
fn mode(&self) -> Result<InodeMode>;
fn owner(&self) -> Result<Uid>;
fn set_owner(&self, uid: Uid) -> Result<()>;
fn group(&self) -> Result<Gid>;
fn set_group(&self, gid: Gid) -> Result<()>;
fn page_cache(&self) -> Option<Arc<Vmo>>;
fn read_at(&self, offset: usize, writer: &mut VmWriter) -> Result<usize>;
fn read_direct_at(&self, offset: usize, writer: &mut VmWriter) -> Result<usize>;
fn write_direct_at(&self, offset: usize, reader: &mut VmReader) -> Result<usize>;
fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents;
fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result<i32>;
fn extension(&self) -> Option<&Extension>;
fn set_xattr(
&self,
name: XattrName,
value_reader: &mut VmReader,
flags: XattrSetFlags,
) -> Result<()>;
fn get_xattr(&self, name: XattrName, value_writer: &mut VmWriter) -> Result<usize>;
fn list_xattr(&self, namespace: XattrNamespace, list_writer: &mut VmWriter) -> Result<usize>;
fn remove_xattr(&self, name: XattrName) -> Result<()>;
fn write_at(&self, offset: usize, reader: &mut VmReader) -> Result<usize> {
self.inode.write_at(offset, reader)
}
fn resize(&self, new_size: usize) -> Result<()> {
self.inode.resize(new_size)
}
fn set_mode(&self, mode: InodeMode) -> Result<()> {
self.inode.set_mode(mode)
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
self.inode.fallocate(mode, offset, len)
}
fn fs(&self) -> Arc<dyn FileSystem> {
// FIXME: Implement `AnonInodeFs` properly and link memfd inodes to it.
static ANON_INODE_FS: Once<Arc<RamFs>> = Once::new();
ANON_INODE_FS.call_once(RamFs::new).clone()
}
}
pub struct MemfdFile {
memfd_inode: Arc<dyn Inode>,
offset: Mutex<usize>,
access_mode: AccessMode,
status_flags: AtomicU32,
}
impl MemfdFile {
pub fn new(name: &str) -> Result<Self> {
pub fn new(name: &str, memfd_flags: MemfdFlags) -> Result<Self> {
if name.len() > MAX_MEMFD_NAME_LEN {
return_errno_with_message!(Errno::EINVAL, "MemfdManager: `name` is too long.");
}
// When Linux performs `memfd_create`, it first creates a RAM inode in a ramfs,
// then immediately unlinks it, and finally returns only the file descriptor.
// Therefore, when using `readlink("/proc/<pid>/fd/<fd>", ...)` to get the file
// path of a `memfd` file, the result will have a `(deleted)` suffix. We stay
// consistent with Linux here.
//
// Reference: <https://github.com/torvalds/linux/blob/379f604cc3dc2c865dc2b13d81faa166b6df59ec/mm/shmem.c#L5803-L5837>
let name = format!("/memfd:{} (deleted)", name);
let inode = new_detached_inode(mkmod!(a+rwx), Uid::new_root(), Gid::new_root());
let name = format!("/memfd:{}", name);
let (allow_sealing, executable) = if memfd_flags.contains(MemfdFlags::MFD_NOEXEC_SEAL) {
(true, false)
} else {
(memfd_flags.contains(MemfdFlags::MFD_ALLOW_SEALING), true)
};
let mode = if executable {
mkmod!(a+rwx)
} else {
mkmod!(a+rw)
};
let memfd_inode = Arc::new_cyclic(|weak_self| {
let ram_inode =
new_detached_inode_in_memfd(weak_self, mode, Uid::new_root(), Gid::new_root());
MemfdInode {
inode: ram_inode,
name,
}
});
Ok(Self {
inode,
name,
memfd_inode,
offset: Mutex::new(0),
access_mode: AccessMode::O_RDWR,
status_flags: AtomicU32::new(0),
})
}
}
impl Pollable for MemfdFile {
fn poll(&self, mask: IoEvents, _poller: Option<&mut PollHandle>) -> IoEvents {
(IoEvents::IN | IoEvents::OUT) & mask
fn memfd_inode(&self) -> &MemfdInode {
self.memfd_inode.downcast_ref::<MemfdInode>().unwrap()
}
}
#[inherit_methods(from = "self.inode")]
impl Pollable for MemfdFile {
fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents {
self.memfd_inode.poll(mask, poller)
}
}
#[inherit_methods(from = "self.memfd_inode")]
impl FileLike for MemfdFile {
fn read_at(&self, offset: usize, writer: &mut VmWriter) -> Result<usize>;
fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result<i32>;
@ -96,7 +188,7 @@ impl FileLike for MemfdFile {
let mut offset = self.offset.lock();
if self.status_flags().contains(StatusFlags::O_APPEND) {
*offset = self.inode.size();
*offset = self.memfd_inode.size();
}
let len = self.write_at(*offset, reader)?;
@ -108,14 +200,14 @@ impl FileLike for MemfdFile {
fn write_at(&self, mut offset: usize, reader: &mut VmReader) -> Result<usize> {
if self.status_flags().contains(StatusFlags::O_APPEND) {
// If the file has the O_APPEND flag, the offset is ignored
offset = self.inode.size();
offset = self.memfd_inode.size();
}
self.inode.write_at(offset, reader)
self.memfd_inode.write_at(offset, reader)
}
fn resize(&self, new_size: usize) -> Result<()> {
do_resize_util(&self.inode, self.status_flags(), new_size)
do_resize_util(&self.memfd_inode, self.status_flags(), new_size)
}
fn status_flags(&self) -> StatusFlags {
@ -134,14 +226,29 @@ impl FileLike for MemfdFile {
}
fn seek(&self, pos: SeekFrom) -> Result<usize> {
do_seek_util(&self.inode, &self.offset, pos)
do_seek_util(&self.memfd_inode, &self.offset, pos)
}
fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> {
do_fallocate_util(&self.inode, self.status_flags(), mode, offset, len)
do_fallocate_util(&self.memfd_inode, self.status_flags(), mode, offset, len)
}
fn mappable(&self) -> Result<Mappable> {
Ok(Mappable::Inode(self.inode.clone()))
Ok(Mappable::Inode(self.memfd_inode.clone()))
}
}
bitflags! {
pub struct MemfdFlags: u32 {
/// Close on exec.
const MFD_CLOEXEC = 1 << 0;
/// Allow sealing operations on this file.
const MFD_ALLOW_SEALING = 1 << 1;
/// Create in the hugetlbfs.
const MFD_HUGETLB = 1 << 2;
/// Not executable and sealed to prevent changing to executable.
const MFD_NOEXEC_SEAL = 1 << 3;
/// Executable.
const MFD_EXEC = 1 << 4;
}
}