diff --git a/kernel/src/fs/ramfs/fs.rs b/kernel/src/fs/ramfs/fs.rs index 87ae7bcb9..4b43dc3be 100644 --- a/kernel/src/fs/ramfs/fs.rs +++ b/kernel/src/fs/ramfs/fs.rs @@ -33,7 +33,7 @@ use crate::{ prelude::*, process::{signal::PollHandle, Gid, Uid}, time::clocks::RealTimeCoarseClock, - vm::vmo::Vmo, + vm::{memfd::MemfdInode, vmo::Vmo}, }; /// A volatile file system whose data and metadata exists only in memory. @@ -93,7 +93,7 @@ impl FileSystem for RamFs { } /// An inode of `RamFs`. -struct RamInode { +pub struct RamInode { /// Inode inner specifics inner: Inner, /// Inode metadata @@ -147,6 +147,10 @@ impl Inner { Self::NamedPipe(NamedPipe::new().unwrap()) } + fn new_file_in_memfd(this: Weak) -> Self { + Self::File(PageCache::new(this).unwrap()) + } + fn as_direntry(&self) -> Option<&RwLock> { match self { Self::Dir(dir_entry) => Some(dir_entry), @@ -425,17 +429,22 @@ impl RamInode { }) } - fn new_file_detached(mode: InodeMode, uid: Uid, gid: Gid) -> Arc { - Arc::new_cyclic(|weak_self| RamInode { - inner: Inner::new_file(weak_self.clone()), + fn new_file_detached_in_memfd( + weak_self: &Weak, + mode: InodeMode, + uid: Uid, + gid: Gid, + ) -> Self { + Self { + inner: Inner::new_file_in_memfd(weak_self.clone()), metadata: SpinLock::new(InodeMeta::new(mode, uid, gid)), ino: weak_self.as_ptr() as u64, typ: InodeType::File, - this: weak_self.clone(), + this: Weak::new(), fs: Weak::new(), extension: Extension::new(), xattr: RamXattr::new(), - }) + } } fn new_symlink(fs: &Arc, mode: InodeMode, uid: Uid, gid: Gid) -> Arc { @@ -1243,11 +1252,16 @@ impl Inode for RamInode { } } -/// Creates a RAM inode that is detached from any `RamFs`. +/// Creates a RAM inode that is detached from any `RamFs`, and resides in a `MemfdInode`. /// // TODO: Add "anonymous inode fs" and link the inode to it. -pub fn new_detached_inode(mode: InodeMode, uid: Uid, gid: Gid) -> Arc { - RamInode::new_file_detached(mode, uid, gid) +pub fn new_detached_inode_in_memfd( + weak_self: &Weak, + mode: InodeMode, + uid: Uid, + gid: Gid, +) -> RamInode { + RamInode::new_file_detached_in_memfd(weak_self, mode, uid, gid) } fn write_lock_two_direntries_by_ino<'a>( diff --git a/kernel/src/fs/ramfs/mod.rs b/kernel/src/fs/ramfs/mod.rs index 8939ddc7e..b2e4fff0e 100644 --- a/kernel/src/fs/ramfs/mod.rs +++ b/kernel/src/fs/ramfs/mod.rs @@ -3,7 +3,7 @@ //! Ramfs based on PageCache use fs::RamFsType; -pub use fs::{new_detached_inode, RamFs}; +pub use fs::{new_detached_inode_in_memfd, RamFs, RamInode}; mod fs; mod xattr; diff --git a/kernel/src/syscall/memfd_create.rs b/kernel/src/syscall/memfd_create.rs index 0bce23e90..63fee3cc4 100644 --- a/kernel/src/syscall/memfd_create.rs +++ b/kernel/src/syscall/memfd_create.rs @@ -1,25 +1,12 @@ // SPDX-License-Identifier: MPL-2.0 -use bitflags::bitflags; - use super::SyscallReturn; use crate::{ fs::file_table::FdFlags, prelude::*, - vm::memfd::{MemfdFile, MAX_MEMFD_NAME_LEN}, + vm::memfd::{MemfdFile, MemfdFlags, MAX_MEMFD_NAME_LEN}, }; -bitflags! { - struct MemfdFlags: u32 { - /// Close on exec. - const MFD_CLOEXEC = 1 << 0; - /// Allow sealing operations on this file. - const MFD_ALLOW_SEALING = 1 << 1; - /// Create in the hugetlbfs. - const MFD_HUGETLB = 1 << 2; - } -} - pub fn sys_memfd_create(name_addr: Vaddr, flags: u32, ctx: &Context) -> Result { // FIXME: When `name` is too long, `read_cstring` returns `EFAULT`. However, // according to , @@ -29,8 +16,6 @@ pub fn sys_memfd_create(name_addr: Vaddr, flags: u32, ctx: &Context) -> Result Result pub const MAX_MEMFD_NAME_LEN: usize = 249; -pub struct MemfdFile { - inode: Arc, +pub struct MemfdInode { + inode: RamInode, #[expect(dead_code)] name: String, +} + +#[inherit_methods(from = "self.inode")] +impl PageCacheBackend for MemfdInode { + fn read_page_async(&self, idx: usize, frame: &CachePage) -> Result; + fn write_page_async(&self, idx: usize, frame: &CachePage) -> Result; + fn npages(&self) -> usize; +} + +#[inherit_methods(from = "self.inode")] +impl Inode for MemfdInode { + fn metadata(&self) -> Metadata; + fn size(&self) -> usize; + fn atime(&self) -> Duration; + fn set_atime(&self, time: Duration); + fn mtime(&self) -> Duration; + fn set_mtime(&self, time: Duration); + fn ctime(&self) -> Duration; + fn set_ctime(&self, time: Duration); + fn ino(&self) -> u64; + fn type_(&self) -> InodeType; + fn mode(&self) -> Result; + fn owner(&self) -> Result; + fn set_owner(&self, uid: Uid) -> Result<()>; + fn group(&self) -> Result; + fn set_group(&self, gid: Gid) -> Result<()>; + fn page_cache(&self) -> Option>; + fn read_at(&self, offset: usize, writer: &mut VmWriter) -> Result; + fn read_direct_at(&self, offset: usize, writer: &mut VmWriter) -> Result; + fn write_direct_at(&self, offset: usize, reader: &mut VmReader) -> Result; + fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents; + fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result; + fn extension(&self) -> Option<&Extension>; + fn set_xattr( + &self, + name: XattrName, + value_reader: &mut VmReader, + flags: XattrSetFlags, + ) -> Result<()>; + fn get_xattr(&self, name: XattrName, value_writer: &mut VmWriter) -> Result; + fn list_xattr(&self, namespace: XattrNamespace, list_writer: &mut VmWriter) -> Result; + fn remove_xattr(&self, name: XattrName) -> Result<()>; + + fn write_at(&self, offset: usize, reader: &mut VmReader) -> Result { + self.inode.write_at(offset, reader) + } + + fn resize(&self, new_size: usize) -> Result<()> { + self.inode.resize(new_size) + } + + fn set_mode(&self, mode: InodeMode) -> Result<()> { + self.inode.set_mode(mode) + } + + fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> { + self.inode.fallocate(mode, offset, len) + } + + fn fs(&self) -> Arc { + // FIXME: Implement `AnonInodeFs` properly and link memfd inodes to it. + static ANON_INODE_FS: Once> = Once::new(); + ANON_INODE_FS.call_once(RamFs::new).clone() + } +} + +pub struct MemfdFile { + memfd_inode: Arc, offset: Mutex, access_mode: AccessMode, status_flags: AtomicU32, } impl MemfdFile { - pub fn new(name: &str) -> Result { + pub fn new(name: &str, memfd_flags: MemfdFlags) -> Result { if name.len() > MAX_MEMFD_NAME_LEN { return_errno_with_message!(Errno::EINVAL, "MemfdManager: `name` is too long."); } - // When Linux performs `memfd_create`, it first creates a RAM inode in a ramfs, - // then immediately unlinks it, and finally returns only the file descriptor. - // Therefore, when using `readlink("/proc//fd/", ...)` to get the file - // path of a `memfd` file, the result will have a `(deleted)` suffix. We stay - // consistent with Linux here. - // - // Reference: - let name = format!("/memfd:{} (deleted)", name); - let inode = new_detached_inode(mkmod!(a+rwx), Uid::new_root(), Gid::new_root()); + let name = format!("/memfd:{}", name); + + let (allow_sealing, executable) = if memfd_flags.contains(MemfdFlags::MFD_NOEXEC_SEAL) { + (true, false) + } else { + (memfd_flags.contains(MemfdFlags::MFD_ALLOW_SEALING), true) + }; + + let mode = if executable { + mkmod!(a+rwx) + } else { + mkmod!(a+rw) + }; + + let memfd_inode = Arc::new_cyclic(|weak_self| { + let ram_inode = + new_detached_inode_in_memfd(weak_self, mode, Uid::new_root(), Gid::new_root()); + + MemfdInode { + inode: ram_inode, + name, + } + }); Ok(Self { - inode, - name, + memfd_inode, offset: Mutex::new(0), access_mode: AccessMode::O_RDWR, status_flags: AtomicU32::new(0), }) } -} -impl Pollable for MemfdFile { - fn poll(&self, mask: IoEvents, _poller: Option<&mut PollHandle>) -> IoEvents { - (IoEvents::IN | IoEvents::OUT) & mask + fn memfd_inode(&self) -> &MemfdInode { + self.memfd_inode.downcast_ref::().unwrap() } } -#[inherit_methods(from = "self.inode")] +impl Pollable for MemfdFile { + fn poll(&self, mask: IoEvents, poller: Option<&mut PollHandle>) -> IoEvents { + self.memfd_inode.poll(mask, poller) + } +} + +#[inherit_methods(from = "self.memfd_inode")] impl FileLike for MemfdFile { fn read_at(&self, offset: usize, writer: &mut VmWriter) -> Result; fn ioctl(&self, cmd: IoctlCmd, arg: usize) -> Result; @@ -96,7 +188,7 @@ impl FileLike for MemfdFile { let mut offset = self.offset.lock(); if self.status_flags().contains(StatusFlags::O_APPEND) { - *offset = self.inode.size(); + *offset = self.memfd_inode.size(); } let len = self.write_at(*offset, reader)?; @@ -108,14 +200,14 @@ impl FileLike for MemfdFile { fn write_at(&self, mut offset: usize, reader: &mut VmReader) -> Result { if self.status_flags().contains(StatusFlags::O_APPEND) { // If the file has the O_APPEND flag, the offset is ignored - offset = self.inode.size(); + offset = self.memfd_inode.size(); } - self.inode.write_at(offset, reader) + self.memfd_inode.write_at(offset, reader) } fn resize(&self, new_size: usize) -> Result<()> { - do_resize_util(&self.inode, self.status_flags(), new_size) + do_resize_util(&self.memfd_inode, self.status_flags(), new_size) } fn status_flags(&self) -> StatusFlags { @@ -134,14 +226,29 @@ impl FileLike for MemfdFile { } fn seek(&self, pos: SeekFrom) -> Result { - do_seek_util(&self.inode, &self.offset, pos) + do_seek_util(&self.memfd_inode, &self.offset, pos) } fn fallocate(&self, mode: FallocMode, offset: usize, len: usize) -> Result<()> { - do_fallocate_util(&self.inode, self.status_flags(), mode, offset, len) + do_fallocate_util(&self.memfd_inode, self.status_flags(), mode, offset, len) } fn mappable(&self) -> Result { - Ok(Mappable::Inode(self.inode.clone())) + Ok(Mappable::Inode(self.memfd_inode.clone())) + } +} + +bitflags! { + pub struct MemfdFlags: u32 { + /// Close on exec. + const MFD_CLOEXEC = 1 << 0; + /// Allow sealing operations on this file. + const MFD_ALLOW_SEALING = 1 << 1; + /// Create in the hugetlbfs. + const MFD_HUGETLB = 1 << 2; + /// Not executable and sealed to prevent changing to executable. + const MFD_NOEXEC_SEAL = 1 << 3; + /// Executable. + const MFD_EXEC = 1 << 4; } }