Support nsfs
This commit is contained in:
parent
78f098fe88
commit
9c023db7b5
|
|
@ -5,6 +5,7 @@ use spin::Once;
|
||||||
use crate::{
|
use crate::{
|
||||||
fs::{
|
fs::{
|
||||||
path::{Mount, Path, PathResolver},
|
path::{Mount, Path, PathResolver},
|
||||||
|
pseudofs::{NsCommonOps, NsFs, NsType},
|
||||||
ramfs::RamFs,
|
ramfs::RamFs,
|
||||||
},
|
},
|
||||||
prelude::*,
|
prelude::*,
|
||||||
|
|
@ -22,6 +23,8 @@ pub struct MountNamespace {
|
||||||
root: Arc<Mount>,
|
root: Arc<Mount>,
|
||||||
/// The user namespace that owns this mount namespace.
|
/// The user namespace that owns this mount namespace.
|
||||||
owner: Arc<UserNamespace>,
|
owner: Arc<UserNamespace>,
|
||||||
|
/// The path in nsfs.
|
||||||
|
path: Path,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MountNamespace {
|
impl MountNamespace {
|
||||||
|
|
@ -36,7 +39,8 @@ impl MountNamespace {
|
||||||
|
|
||||||
Arc::new_cyclic(|weak_self| {
|
Arc::new_cyclic(|weak_self| {
|
||||||
let root = Mount::new_root(rootfs, weak_self.clone());
|
let root = Mount::new_root(rootfs, weak_self.clone());
|
||||||
MountNamespace { root, owner }
|
let path = NsFs::new_path(weak_self.clone());
|
||||||
|
MountNamespace { root, owner, path }
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -73,10 +77,11 @@ impl MountNamespace {
|
||||||
let new_mnt_ns = Arc::new_cyclic(|weak_self| {
|
let new_mnt_ns = Arc::new_cyclic(|weak_self| {
|
||||||
let new_root =
|
let new_root =
|
||||||
root_mount.clone_mount_tree(root_mount.root_dentry(), Some(weak_self), true);
|
root_mount.clone_mount_tree(root_mount.root_dentry(), Some(weak_self), true);
|
||||||
|
let path = NsFs::new_path(weak_self.clone());
|
||||||
MountNamespace {
|
MountNamespace {
|
||||||
root: new_root,
|
root: new_root,
|
||||||
owner,
|
owner,
|
||||||
|
path,
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -134,3 +139,26 @@ impl Drop for MountNamespace {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl NsCommonOps for MountNamespace {
|
||||||
|
const TYPE: NsType = NsType::Mnt;
|
||||||
|
|
||||||
|
fn get_owner_user_ns(&self) -> Result<&Arc<UserNamespace>> {
|
||||||
|
Ok(&self.owner)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_parent(&self) -> Result<Arc<Self>> {
|
||||||
|
return_errno_with_message!(
|
||||||
|
Errno::EINVAL,
|
||||||
|
"mnt namespace does not have parent namespace"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn path(&self) -> &Path {
|
||||||
|
&self.path
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,8 @@ use crate::{
|
||||||
cgroup::CgroupFileOps, cmdline::CmdlineFileOps, comm::CommFileOps,
|
cgroup::CgroupFileOps, cmdline::CmdlineFileOps, comm::CommFileOps,
|
||||||
environ::EnvironFileOps, exe::ExeSymOps, fd::FdDirOps, gid_map::GidMapFileOps,
|
environ::EnvironFileOps, exe::ExeSymOps, fd::FdDirOps, gid_map::GidMapFileOps,
|
||||||
maps::MapsFileOps, mem::MemFileOps, mountinfo::MountInfoFileOps,
|
maps::MapsFileOps, mem::MemFileOps, mountinfo::MountInfoFileOps,
|
||||||
mounts::MountsFileOps, oom_score_adj::OomScoreAdjFileOps, stat::StatFileOps,
|
mounts::MountsFileOps, ns::NsDirOps, oom_score_adj::OomScoreAdjFileOps,
|
||||||
status::StatusFileOps, uid_map::UidMapFileOps,
|
stat::StatFileOps, status::StatusFileOps, uid_map::UidMapFileOps,
|
||||||
},
|
},
|
||||||
template::{
|
template::{
|
||||||
DirOps, ProcDir, ProcDirBuilder, lookup_child_from_table,
|
DirOps, ProcDir, ProcDirBuilder, lookup_child_from_table,
|
||||||
|
|
@ -38,6 +38,7 @@ mod maps;
|
||||||
mod mem;
|
mod mem;
|
||||||
mod mountinfo;
|
mod mountinfo;
|
||||||
mod mounts;
|
mod mounts;
|
||||||
|
mod ns;
|
||||||
mod oom_score_adj;
|
mod oom_score_adj;
|
||||||
mod stat;
|
mod stat;
|
||||||
mod status;
|
mod status;
|
||||||
|
|
@ -111,6 +112,7 @@ impl TidDirOps {
|
||||||
("gid_map", GidMapFileOps::new_inode),
|
("gid_map", GidMapFileOps::new_inode),
|
||||||
("mem", MemFileOps::new_inode),
|
("mem", MemFileOps::new_inode),
|
||||||
("mountinfo", MountInfoFileOps::new_inode),
|
("mountinfo", MountInfoFileOps::new_inode),
|
||||||
|
("ns", NsDirOps::new_inode),
|
||||||
("oom_score_adj", OomScoreAdjFileOps::new_inode),
|
("oom_score_adj", OomScoreAdjFileOps::new_inode),
|
||||||
("stat", StatFileOps::new_inode),
|
("stat", StatFileOps::new_inode),
|
||||||
("status", StatusFileOps::new_inode),
|
("status", StatusFileOps::new_inode),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,180 @@
|
||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
use core::marker::PhantomData;
|
||||||
|
|
||||||
|
use aster_util::slot_vec::SlotVec;
|
||||||
|
use ostd::sync::RwMutexUpgradeableGuard;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
fs::{
|
||||||
|
path::{MountNamespace, Path},
|
||||||
|
procfs::{
|
||||||
|
DirOps, ProcDir, ProcDirBuilder, ProcSymBuilder, SymOps, pid::TidDirOps,
|
||||||
|
template::ProcSym,
|
||||||
|
},
|
||||||
|
pseudofs::NsCommonOps,
|
||||||
|
utils::{DirEntryVecExt, Inode, SymbolicLink, mkmod},
|
||||||
|
},
|
||||||
|
net::uts_ns::UtsNamespace,
|
||||||
|
prelude::*,
|
||||||
|
process::{NsProxy, UserNamespace, posix_thread::AsPosixThread},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Represents the inode at `/proc/[pid]/task/[tid]/ns` (and also `/proc/[pid]/ns`).
|
||||||
|
pub(super) struct NsDirOps {
|
||||||
|
dir: TidDirOps,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NsDirOps {
|
||||||
|
/// Creates a new directory inode for the `ns` directory.
|
||||||
|
pub fn new_inode(dir: &TidDirOps, parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
|
||||||
|
ProcDirBuilder::new(
|
||||||
|
Self { dir: dir.clone() },
|
||||||
|
// Reference: <https://elixir.bootlin.com/linux/v6.18/source/fs/proc/base.c#L3321>
|
||||||
|
mkmod!(u + r, a + x),
|
||||||
|
)
|
||||||
|
.parent(parent)
|
||||||
|
.build()
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NsDirOps {
|
||||||
|
/// The set of namespace entries that depend on the thread's [`NsProxy`].
|
||||||
|
#[expect(clippy::type_complexity)]
|
||||||
|
const NS_PROXY_ENTRIES: &[(&str, fn(&NsProxy, Weak<dyn Inode>) -> Arc<dyn Inode>)] = &[
|
||||||
|
("uts", |proxy, parent| {
|
||||||
|
NsSymOps::new_inode(proxy.uts_ns(), parent)
|
||||||
|
}),
|
||||||
|
("mnt", |proxy, parent| {
|
||||||
|
NsSymOps::new_inode(proxy.mnt_ns(), parent)
|
||||||
|
}),
|
||||||
|
];
|
||||||
|
|
||||||
|
/// Looks up a namespace symlink backed by the thread's [`NsProxy`].
|
||||||
|
fn lookup_ns_proxy_child(
|
||||||
|
&self,
|
||||||
|
name: &str,
|
||||||
|
parent: Weak<dyn Inode>,
|
||||||
|
) -> Option<Result<Arc<dyn Inode>>> {
|
||||||
|
let constructor = Self::NS_PROXY_ENTRIES
|
||||||
|
.iter()
|
||||||
|
.find(|(entry_name, _)| *entry_name == name)
|
||||||
|
.map(|(_, ctor)| ctor)?;
|
||||||
|
|
||||||
|
let thread = self.dir.thread();
|
||||||
|
let ns_proxy = thread.as_posix_thread().unwrap().ns_proxy().lock();
|
||||||
|
let Some(ns_proxy) = ns_proxy.as_ref() else {
|
||||||
|
return Some(Err(Error::with_message(
|
||||||
|
Errno::ENOENT,
|
||||||
|
"the thread's namespace proxy no longer exists",
|
||||||
|
)));
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(Ok(constructor(ns_proxy, parent)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DirOps for NsDirOps {
|
||||||
|
fn lookup_child(&self, dir: &ProcDir<Self>, name: &str) -> Result<Arc<dyn Inode>> {
|
||||||
|
let inode = if let Some(result) = self.lookup_ns_proxy_child(name, dir.this_weak().clone())
|
||||||
|
{
|
||||||
|
result?
|
||||||
|
} else if name == "user" {
|
||||||
|
let user_ns = self.dir.process_ref.user_ns().lock();
|
||||||
|
NsSymOps::new_inode(&*user_ns, dir.this_weak().clone())
|
||||||
|
} else {
|
||||||
|
return_errno_with_message!(Errno::ENOENT, "the file does not exist");
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut cached_children = dir.cached_children().write();
|
||||||
|
cached_children.remove_entry_by_name(name);
|
||||||
|
cached_children.put((name.to_string(), inode.clone()));
|
||||||
|
Ok(inode)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn populate_children<'a>(
|
||||||
|
&self,
|
||||||
|
dir: &'a ProcDir<Self>,
|
||||||
|
) -> RwMutexUpgradeableGuard<'a, SlotVec<(String, Arc<dyn Inode>)>> {
|
||||||
|
let mut cached_children = dir.cached_children().write();
|
||||||
|
|
||||||
|
let thread = self.dir.thread();
|
||||||
|
let ns_proxy = thread.as_posix_thread().unwrap().ns_proxy().lock();
|
||||||
|
|
||||||
|
// Refresh NsProxy-backed entries: remove stale ones and re-add if the proxy is alive.
|
||||||
|
for &(name, constructor) in Self::NS_PROXY_ENTRIES {
|
||||||
|
cached_children.remove_entry_by_name(name);
|
||||||
|
if let Some(ns_proxy) = ns_proxy.as_ref() {
|
||||||
|
let inode = constructor(ns_proxy, dir.this_weak().clone());
|
||||||
|
cached_children.put((name.to_string(), inode));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The user namespace never changes, so only insert if absent.
|
||||||
|
cached_children.put_entry_if_not_found("user", || {
|
||||||
|
let user_ns = self.dir.process_ref.user_ns().lock();
|
||||||
|
NsSymOps::new_inode(&*user_ns, dir.this_weak().clone())
|
||||||
|
});
|
||||||
|
|
||||||
|
cached_children.downgrade()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_child(&self, child: &dyn Inode) -> bool {
|
||||||
|
// The user namespace of a thread/process never changes,
|
||||||
|
// so a user-ns symlink is always valid.
|
||||||
|
if child.downcast_ref::<NsSymlink<UserNamespace>>().is_some() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Checks whether `child` still matches the corresponding namespace
|
||||||
|
// in the thread's current `NsProxy`.
|
||||||
|
|
||||||
|
let thread = self.dir.thread();
|
||||||
|
let ns_proxy = thread.as_posix_thread().unwrap().ns_proxy().lock();
|
||||||
|
let Some(ns_proxy) = ns_proxy.as_ref() else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(sym) = child.downcast_ref::<NsSymlink<UtsNamespace>>() {
|
||||||
|
return &sym.inner().ns_path == ns_proxy.uts_ns().path();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(sym) = child.downcast_ref::<NsSymlink<MountNamespace>>() {
|
||||||
|
return &sym.inner().ns_path == ns_proxy.mnt_ns().path();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Support additional namespace types.
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type NsSymlink<T> = ProcSym<NsSymOps<T>>;
|
||||||
|
|
||||||
|
/// Represents the inode at `/proc/[pid]/task/[tid]/ns/<type>` (and also `/proc/[pid]/ns/<type>`).
|
||||||
|
pub struct NsSymOps<T: NsCommonOps> {
|
||||||
|
ns_path: Path,
|
||||||
|
phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: NsCommonOps> NsSymOps<T> {
|
||||||
|
/// Creates a new symlink inode pointing to the given namespace.
|
||||||
|
fn new_inode(ns: &Arc<T>, parent: Weak<dyn Inode>) -> Arc<dyn Inode> {
|
||||||
|
ProcSymBuilder::new(
|
||||||
|
Self {
|
||||||
|
ns_path: ns.path().clone(),
|
||||||
|
phantom: PhantomData,
|
||||||
|
},
|
||||||
|
mkmod!(a + rwx),
|
||||||
|
)
|
||||||
|
.parent(parent)
|
||||||
|
.build()
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: NsCommonOps> SymOps for NsSymOps<T> {
|
||||||
|
fn read_link(&self) -> Result<SymbolicLink> {
|
||||||
|
Ok(SymbolicLink::Path(self.ns_path.clone()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -6,6 +6,7 @@ use core::{
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use anon_inode_fs::AnonInodeFs;
|
pub use anon_inode_fs::AnonInodeFs;
|
||||||
|
pub use nsfs::{NsCommonOps, NsFile, NsFs, NsType};
|
||||||
pub use pidfdfs::PidfdFs;
|
pub use pidfdfs::PidfdFs;
|
||||||
pub(super) use pipefs::PipeFs;
|
pub(super) use pipefs::PipeFs;
|
||||||
use pipefs::PipeFsType;
|
use pipefs::PipeFsType;
|
||||||
|
|
@ -28,6 +29,7 @@ use crate::{
|
||||||
};
|
};
|
||||||
|
|
||||||
mod anon_inode_fs;
|
mod anon_inode_fs;
|
||||||
|
mod nsfs;
|
||||||
mod pidfdfs;
|
mod pidfdfs;
|
||||||
mod pipefs;
|
mod pipefs;
|
||||||
mod sockfs;
|
mod sockfs;
|
||||||
|
|
@ -122,6 +124,7 @@ pub enum PseudoInodeType {
|
||||||
Socket,
|
Socket,
|
||||||
AnonInode,
|
AnonInode,
|
||||||
Pidfd,
|
Pidfd,
|
||||||
|
Ns,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<PseudoInodeType> for InodeType {
|
impl From<PseudoInodeType> for InodeType {
|
||||||
|
|
@ -132,6 +135,7 @@ impl From<PseudoInodeType> for InodeType {
|
||||||
PseudoInodeType::Socket => InodeType::Socket,
|
PseudoInodeType::Socket => InodeType::Socket,
|
||||||
PseudoInodeType::AnonInode => InodeType::Unknown,
|
PseudoInodeType::AnonInode => InodeType::Unknown,
|
||||||
PseudoInodeType::Pidfd => InodeType::Unknown,
|
PseudoInodeType::Pidfd => InodeType::Unknown,
|
||||||
|
PseudoInodeType::Ns => InodeType::File,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,365 @@
|
||||||
|
// SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
use alloc::format;
|
||||||
|
use core::time::Duration;
|
||||||
|
|
||||||
|
use inherit_methods_macro::inherit_methods;
|
||||||
|
use ostd::task::Task;
|
||||||
|
use spin::Once;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
events::IoEvents,
|
||||||
|
fs::{
|
||||||
|
file_table::{FdFlags, FileDesc},
|
||||||
|
inode_handle::{FileIo, InodeHandle},
|
||||||
|
path::{Mount, Path},
|
||||||
|
pseudofs::{PseudoFs, PseudoInode, PseudoInodeType},
|
||||||
|
utils::{
|
||||||
|
AccessMode, Extension, FileSystem, Inode, InodeIo, InodeMode, InodeType, Metadata,
|
||||||
|
StatusFlags, mkmod,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
prelude::*,
|
||||||
|
process::{
|
||||||
|
CloneFlags, Gid, Uid, UserNamespace,
|
||||||
|
signal::{PollHandle, Pollable},
|
||||||
|
},
|
||||||
|
util::ioctl::{RawIoctl, dispatch_ioctl},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// A pseudo filesystem for namespace files.
|
||||||
|
pub struct NsFs {
|
||||||
|
_private: (),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NsFs {
|
||||||
|
/// Returns the singleton instance of the ns filesystem.
|
||||||
|
pub fn singleton() -> &'static Arc<PseudoFs> {
|
||||||
|
static NSFS: Once<Arc<PseudoFs>> = Once::new();
|
||||||
|
PseudoFs::singleton(&NSFS, "nsfs", NSFS_MAGIC)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a pseudo [`Path`] for a namespace file.
|
||||||
|
pub fn new_path<T: NsCommonOps>(ns: Weak<T>) -> Path {
|
||||||
|
let ns_inode = {
|
||||||
|
let ino = Self::singleton().alloc_id();
|
||||||
|
let fs = Arc::downgrade(Self::singleton());
|
||||||
|
Arc::new(NsInode::new(ino, Uid::new_root(), Gid::new_root(), ns, fs))
|
||||||
|
};
|
||||||
|
|
||||||
|
Path::new_pseudo(Self::mount_node().clone(), ns_inode, |inode| {
|
||||||
|
let inode = inode.downcast_ref::<NsInode<T>>().unwrap();
|
||||||
|
inode.name().to_string()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the pseudo mount node of the ns filesystem.
|
||||||
|
pub fn mount_node() -> &'static Arc<Mount> {
|
||||||
|
static NSFS_MOUNT: Once<Arc<Mount>> = Once::new();
|
||||||
|
NSFS_MOUNT.call_once(|| Mount::new_pseudo(Self::singleton().clone()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An inode representing a namespace entry in [`NsFs`].
|
||||||
|
struct NsInode<T: NsCommonOps> {
|
||||||
|
common: PseudoInode,
|
||||||
|
ns: Weak<T>,
|
||||||
|
name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: NsCommonOps> NsInode<T> {
|
||||||
|
fn new(ino: u64, uid: Uid, gid: Gid, ns: Weak<T>, fs: Weak<PseudoFs>) -> Self {
|
||||||
|
let mode = mkmod!(a + r);
|
||||||
|
let common = PseudoInode::new(ino, PseudoInodeType::Ns, mode, uid, gid, fs);
|
||||||
|
let name = format!("{}:[{}]", T::NAME, ino);
|
||||||
|
|
||||||
|
Self { common, ns, name }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
&self.name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inherit_methods(from = "self.common")]
|
||||||
|
impl<T: NsCommonOps> Inode for NsInode<T> {
|
||||||
|
fn size(&self) -> usize;
|
||||||
|
fn resize(&self, _new_size: usize) -> Result<()>;
|
||||||
|
fn metadata(&self) -> Metadata;
|
||||||
|
fn extension(&self) -> &Extension;
|
||||||
|
fn ino(&self) -> u64;
|
||||||
|
fn type_(&self) -> InodeType;
|
||||||
|
fn mode(&self) -> Result<InodeMode>;
|
||||||
|
fn set_mode(&self, mode: InodeMode) -> Result<()>;
|
||||||
|
fn owner(&self) -> Result<Uid>;
|
||||||
|
fn set_owner(&self, uid: Uid) -> Result<()>;
|
||||||
|
fn group(&self) -> Result<Gid>;
|
||||||
|
fn set_group(&self, gid: Gid) -> Result<()>;
|
||||||
|
fn atime(&self) -> Duration;
|
||||||
|
fn set_atime(&self, time: Duration);
|
||||||
|
fn mtime(&self) -> Duration;
|
||||||
|
fn set_mtime(&self, time: Duration);
|
||||||
|
fn ctime(&self) -> Duration;
|
||||||
|
fn set_ctime(&self, time: Duration);
|
||||||
|
fn fs(&self) -> Arc<dyn FileSystem>;
|
||||||
|
|
||||||
|
fn open(
|
||||||
|
&self,
|
||||||
|
access_mode: AccessMode,
|
||||||
|
_status_flags: StatusFlags,
|
||||||
|
) -> Option<Result<Box<dyn FileIo>>> {
|
||||||
|
// FIXME: This may not be the most appropriate place to check the access mode,
|
||||||
|
// but the check must not be bypassed even if the current process has the
|
||||||
|
// CAP_DAC_OVERRIDE capability. It is hard to find a better place for it,
|
||||||
|
// and an extra check here does no harm.
|
||||||
|
if access_mode.is_writable() {
|
||||||
|
return Some(Err(Error::with_message(
|
||||||
|
Errno::EPERM,
|
||||||
|
"ns files cannot be opened as writable",
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let ns = self
|
||||||
|
.ns
|
||||||
|
.upgrade()
|
||||||
|
.ok_or_else(|| Error::with_message(Errno::EPERM, "the namespace no longer exists"));
|
||||||
|
|
||||||
|
Some(ns.map(|ns| Box::new(NsFile { ns }) as Box<dyn FileIo>))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inherit_methods(from = "self.common")]
|
||||||
|
impl<T: NsCommonOps> InodeIo for NsInode<T> {
|
||||||
|
fn read_at(
|
||||||
|
&self,
|
||||||
|
_offset: usize,
|
||||||
|
_writer: &mut VmWriter,
|
||||||
|
_status: StatusFlags,
|
||||||
|
) -> Result<usize>;
|
||||||
|
fn write_at(
|
||||||
|
&self,
|
||||||
|
_offset: usize,
|
||||||
|
_reader: &mut VmReader,
|
||||||
|
_status: StatusFlags,
|
||||||
|
) -> Result<usize>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A file handle referencing a live namespace.
|
||||||
|
pub struct NsFile<T: NsCommonOps> {
|
||||||
|
ns: Arc<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: NsCommonOps> NsFile<T> {
|
||||||
|
/// Returns a reference to the underlying namespace.
|
||||||
|
pub fn ns(&self) -> &Arc<T> {
|
||||||
|
&self.ns
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: NsCommonOps> FileIo for NsFile<T> {
|
||||||
|
fn check_seekable(&self) -> Result<()> {
|
||||||
|
return_errno_with_message!(Errno::ESPIPE, "ns files is not seekable");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_offset_aware(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ioctl(&self, raw_ioctl: RawIoctl) -> Result<i32> {
|
||||||
|
use ioctl_defs::*;
|
||||||
|
dispatch_ioctl!(match raw_ioctl {
|
||||||
|
_cmd @ GetUserNs => {
|
||||||
|
let user_ns = self.ns.get_owner_user_ns()?;
|
||||||
|
|
||||||
|
let current = current!();
|
||||||
|
let current_user_ns = current.user_ns().lock();
|
||||||
|
if !current_user_ns.is_same_or_ancestor_of(user_ns) {
|
||||||
|
return_errno_with_message!(
|
||||||
|
Errno::EPERM,
|
||||||
|
"the owner user namespace is not an ancestor of the current namespace"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
open_ns_as_file(user_ns.as_ref())
|
||||||
|
}
|
||||||
|
_cmd @ GetParent => {
|
||||||
|
let parent = self.ns.get_parent()?;
|
||||||
|
open_ns_as_file(parent.as_ref())
|
||||||
|
}
|
||||||
|
_cmd @ GetType => {
|
||||||
|
let clone_flags = CloneFlags::from(T::TYPE);
|
||||||
|
Ok(clone_flags.bits().cast_signed())
|
||||||
|
}
|
||||||
|
cmd @ GetOwnerUid => {
|
||||||
|
let user_ns = self
|
||||||
|
.ns
|
||||||
|
.as_any()
|
||||||
|
.downcast_ref::<UserNamespace>()
|
||||||
|
.ok_or_else(|| {
|
||||||
|
Error::with_message(
|
||||||
|
Errno::EINVAL,
|
||||||
|
"the ns file does not correspond to a user namespace",
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
let uid = user_ns.get_owner_uid()?;
|
||||||
|
cmd.write(&uid.into())?;
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
// TODO: Support additional iotcl commands
|
||||||
|
_ => return_errno_with_message!(Errno::ENOTTY, "unsupported ioctl command"),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: NsCommonOps> Pollable for NsFile<T> {
|
||||||
|
fn poll(&self, mask: IoEvents, _poller: Option<&mut PollHandle>) -> IoEvents {
|
||||||
|
(IoEvents::IN | IoEvents::OUT | IoEvents::RDNORM) & mask
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: NsCommonOps> InodeIo for NsFile<T> {
|
||||||
|
fn read_at(
|
||||||
|
&self,
|
||||||
|
_offset: usize,
|
||||||
|
_writer: &mut VmWriter,
|
||||||
|
_status_flags: StatusFlags,
|
||||||
|
) -> Result<usize> {
|
||||||
|
return_errno_with_message!(Errno::EINVAL, "ns files do not support read_at");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_at(
|
||||||
|
&self,
|
||||||
|
_offset: usize,
|
||||||
|
_reader: &mut VmReader,
|
||||||
|
_status_flags: StatusFlags,
|
||||||
|
) -> Result<usize> {
|
||||||
|
return_errno_with_message!(Errno::EINVAL, "ns files do not support write_at");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Opens a namespace as a file and returns the file descriptor.
|
||||||
|
fn open_ns_as_file<T: NsCommonOps>(ns: &T) -> Result<FileDesc> {
|
||||||
|
let path = ns.path();
|
||||||
|
let inode_handle = InodeHandle::new(path.clone(), AccessMode::O_RDONLY, StatusFlags::empty())?;
|
||||||
|
|
||||||
|
let current_task = Task::current().unwrap();
|
||||||
|
let thread_local = current_task.as_thread_local().unwrap();
|
||||||
|
let mut file_table_ref = thread_local.borrow_file_table_mut();
|
||||||
|
let mut file_table = file_table_ref.unwrap().write();
|
||||||
|
let fd = file_table.insert(Arc::new(inode_handle), FdFlags::CLOEXEC);
|
||||||
|
|
||||||
|
Ok(fd)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Common operations shared by all namespace types.
|
||||||
|
///
|
||||||
|
/// Implementors represent a specific kind of namespace (e.g., UTS, mount, user)
|
||||||
|
/// and must provide the associated metadata and traversal methods required by
|
||||||
|
/// [`NsFs`] and [`NsFile`].
|
||||||
|
pub trait NsCommonOps: Any + Send + Sync + 'static {
|
||||||
|
/// The human-readable name of this namespace kind (derived from [`Self::TYPE`]).
|
||||||
|
const NAME: &str = Self::TYPE.as_str();
|
||||||
|
|
||||||
|
/// The [`NsType`] discriminant for this namespace kind.
|
||||||
|
const TYPE: NsType;
|
||||||
|
|
||||||
|
/// Returns the owner user namespace.
|
||||||
|
fn get_owner_user_ns(&self) -> Result<&Arc<UserNamespace>>;
|
||||||
|
|
||||||
|
/// Returns the parent namespace, if one exists.
|
||||||
|
fn get_parent(&self) -> Result<Arc<Self>>;
|
||||||
|
|
||||||
|
/// Returns the pseudo filesystem [`Path`] associated with this namespace.
|
||||||
|
fn path(&self) -> &Path;
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||||
|
pub enum NsType {
|
||||||
|
Uts,
|
||||||
|
User,
|
||||||
|
Mnt,
|
||||||
|
#[expect(unused)]
|
||||||
|
Pid,
|
||||||
|
#[expect(unused)]
|
||||||
|
Time,
|
||||||
|
#[expect(unused)]
|
||||||
|
Cgroup,
|
||||||
|
#[expect(unused)]
|
||||||
|
Ipc,
|
||||||
|
#[expect(unused)]
|
||||||
|
Net,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NsType {
|
||||||
|
const fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
NsType::Uts => "uts",
|
||||||
|
NsType::User => "user",
|
||||||
|
NsType::Mnt => "mnt",
|
||||||
|
NsType::Pid => "pid",
|
||||||
|
NsType::Time => "time",
|
||||||
|
NsType::Cgroup => "cgroup",
|
||||||
|
NsType::Ipc => "ipc",
|
||||||
|
NsType::Net => "net",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<NsType> for CloneFlags {
|
||||||
|
fn from(value: NsType) -> Self {
|
||||||
|
match value {
|
||||||
|
NsType::Uts => CloneFlags::CLONE_NEWUTS,
|
||||||
|
NsType::User => CloneFlags::CLONE_NEWUSER,
|
||||||
|
NsType::Mnt => CloneFlags::CLONE_NEWNS,
|
||||||
|
NsType::Pid => CloneFlags::CLONE_NEWPID,
|
||||||
|
NsType::Time => CloneFlags::CLONE_NEWTIME,
|
||||||
|
NsType::Cgroup => CloneFlags::CLONE_NEWCGROUP,
|
||||||
|
NsType::Ipc => CloneFlags::CLONE_NEWIPC,
|
||||||
|
NsType::Net => CloneFlags::CLONE_NEWNET,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reference: <https://elixir.bootlin.com/linux/v6.16.5/source/include/uapi/linux/magic.h#L95>
|
||||||
|
const NSFS_MAGIC: u64 = 0x6e736673;
|
||||||
|
|
||||||
|
mod ioctl_defs {
|
||||||
|
use crate::util::ioctl::{InData, NoData, OutData, ioc};
|
||||||
|
|
||||||
|
// Legacy encoding ioctl commands
|
||||||
|
|
||||||
|
/// Returns a file descriptor of the owner user namespace.
|
||||||
|
pub type GetUserNs = ioc!(NS_GET_USERNS, 0xb701, NoData);
|
||||||
|
/// Returns a file descriptor of the parent namespace.
|
||||||
|
pub type GetParent = ioc!(NS_GET_PARENT, 0xb702, NoData);
|
||||||
|
/// Gets the type of the namespace (e.g., user, pid, mnt, etc.).
|
||||||
|
pub type GetType = ioc!(NS_GET_NSTYPE, 0xb703, NoData);
|
||||||
|
/// Gets the user ID of the namespace owner.
|
||||||
|
///
|
||||||
|
/// Only user namespace supports this operation.
|
||||||
|
pub type GetOwnerUid = ioc!(NS_GET_OWNER_ID, 0xb704, OutData<u32>);
|
||||||
|
|
||||||
|
// Modern encoding ioctl commands
|
||||||
|
|
||||||
|
#[expect(unused)]
|
||||||
|
/// Gets the ID of the mount namespace.
|
||||||
|
pub type GetMntNsId = ioc!(NS_GET_MNTNS_ID, 0xb7, 0x5, OutData<u64>);
|
||||||
|
/// Translates thread ID from the target PID namespace into the caller's PID namespace.
|
||||||
|
#[expect(unused)]
|
||||||
|
pub type GetTidFromPidNs = ioc!(NS_GET_PID_FROM_PIDNS, 0xb7, 0x6, InData<i32>);
|
||||||
|
/// Translates process ID from the target PID namespace into the caller's PID namespace.
|
||||||
|
#[expect(unused)]
|
||||||
|
pub type GetPidFromPidNs = ioc!(NS_GET_TGID_FROM_PIDNS, 0xb7, 0x7, InData<i32>);
|
||||||
|
/// Translates thread ID from the caller's PID namespace into the target PID namespace.
|
||||||
|
#[expect(unused)]
|
||||||
|
pub type GetTidInPidNs = ioc!(NS_GET_PID_IN_PIDNS, 0xb7, 0x8, InData<i32>);
|
||||||
|
/// Translates process ID from the caller's PID namespace into the target PID namespace.
|
||||||
|
#[expect(unused)]
|
||||||
|
pub type GetPidInPidNs = ioc!(NS_GET_TGID_IN_PIDNS, 0xb7, 0x9, InData<i32>);
|
||||||
|
}
|
||||||
|
|
@ -4,6 +4,10 @@ use ostd::{const_assert, sync::RwMutexReadGuard};
|
||||||
use spin::Once;
|
use spin::Once;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
fs::{
|
||||||
|
path::Path,
|
||||||
|
pseudofs::{NsCommonOps, NsFs, NsType},
|
||||||
|
},
|
||||||
prelude::*,
|
prelude::*,
|
||||||
process::{UserNamespace, credentials::capabilities::CapSet, posix_thread::PosixThread},
|
process::{UserNamespace, credentials::capabilities::CapSet, posix_thread::PosixThread},
|
||||||
util::padded,
|
util::padded,
|
||||||
|
|
@ -13,6 +17,7 @@ use crate::{
|
||||||
pub struct UtsNamespace {
|
pub struct UtsNamespace {
|
||||||
uts_name: RwMutex<UtsName>,
|
uts_name: RwMutex<UtsName>,
|
||||||
owner: Arc<UserNamespace>,
|
owner: Arc<UserNamespace>,
|
||||||
|
path: Path,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UtsNamespace {
|
impl UtsNamespace {
|
||||||
|
|
@ -33,11 +38,18 @@ impl UtsNamespace {
|
||||||
};
|
};
|
||||||
|
|
||||||
let owner = UserNamespace::get_init_singleton().clone();
|
let owner = UserNamespace::get_init_singleton().clone();
|
||||||
|
Self::new(uts_name, owner)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
Arc::new(Self {
|
fn new(uts_name: UtsName, owner: Arc<UserNamespace>) -> Arc<Self> {
|
||||||
|
Arc::new_cyclic(|weak_self| {
|
||||||
|
let path = NsFs::new_path(weak_self.clone());
|
||||||
|
Self {
|
||||||
uts_name: RwMutex::new(uts_name),
|
uts_name: RwMutex::new(uts_name),
|
||||||
owner,
|
owner,
|
||||||
})
|
path,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -48,10 +60,7 @@ impl UtsNamespace {
|
||||||
posix_thread: &PosixThread,
|
posix_thread: &PosixThread,
|
||||||
) -> Result<Arc<Self>> {
|
) -> Result<Arc<Self>> {
|
||||||
owner.check_cap(CapSet::SYS_ADMIN, posix_thread)?;
|
owner.check_cap(CapSet::SYS_ADMIN, posix_thread)?;
|
||||||
Ok(Arc::new(Self {
|
Ok(Self::new(*self.uts_name.read(), owner))
|
||||||
uts_name: RwMutex::new(*self.uts_name.read()),
|
|
||||||
owner,
|
|
||||||
}))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the owner user namespace of the namespace.
|
/// Returns the owner user namespace of the namespace.
|
||||||
|
|
@ -195,3 +204,26 @@ impl UtsName {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl NsCommonOps for UtsNamespace {
|
||||||
|
const TYPE: NsType = NsType::Uts;
|
||||||
|
|
||||||
|
fn get_owner_user_ns(&self) -> Result<&Arc<UserNamespace>> {
|
||||||
|
Ok(&self.owner)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_parent(&self) -> Result<Arc<Self>> {
|
||||||
|
return_errno_with_message!(
|
||||||
|
Errno::EINVAL,
|
||||||
|
"UTS namespace does not have parent namespace"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn path(&self) -> &Path {
|
||||||
|
&self.path
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,18 @@
|
||||||
use spin::Once;
|
use spin::Once;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
fs::{
|
||||||
|
path::Path,
|
||||||
|
pseudofs::{NsCommonOps, NsFs, NsType},
|
||||||
|
},
|
||||||
prelude::*,
|
prelude::*,
|
||||||
process::{credentials::capabilities::CapSet, posix_thread::PosixThread},
|
process::{Uid, credentials::capabilities::CapSet, posix_thread::PosixThread},
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The user namespace.
|
/// The user namespace.
|
||||||
pub struct UserNamespace {
|
pub struct UserNamespace {
|
||||||
_private: (),
|
_private: (),
|
||||||
|
path: Path,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UserNamespace {
|
impl UserNamespace {
|
||||||
|
|
@ -17,7 +22,14 @@ impl UserNamespace {
|
||||||
pub fn get_init_singleton() -> &'static Arc<UserNamespace> {
|
pub fn get_init_singleton() -> &'static Arc<UserNamespace> {
|
||||||
static INIT: Once<Arc<UserNamespace>> = Once::new();
|
static INIT: Once<Arc<UserNamespace>> = Once::new();
|
||||||
|
|
||||||
INIT.call_once(|| Arc::new(UserNamespace { _private: () }))
|
INIT.call_once(Self::new)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new() -> Arc<Self> {
|
||||||
|
Arc::new_cyclic(|weak_self| {
|
||||||
|
let path = NsFs::new_path(weak_self.clone());
|
||||||
|
Self { _private: (), path }
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether the thread has the required capability in this user namespace.
|
/// Checks whether the thread has the required capability in this user namespace.
|
||||||
|
|
@ -37,4 +49,47 @@ impl UserNamespace {
|
||||||
"the thread does not have the required capability"
|
"the thread does not have the required capability"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the owner UID of the user namespace.
|
||||||
|
pub fn get_owner_uid(&self) -> Result<Uid> {
|
||||||
|
// FIXME: The owner of the user namespace is not yet tracked.
|
||||||
|
// Return the correct user ID once ownership tracking is implemented.
|
||||||
|
Ok(Uid::new_root())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns whether this namespace is the same as, or an ancestor of, the other namespace.
|
||||||
|
pub fn is_same_or_ancestor_of(self: &Arc<Self>, other: &Arc<Self>) -> bool {
|
||||||
|
// FIXME: Creating new user namespaces is not yet supported,
|
||||||
|
// so we simply check pointer equality.
|
||||||
|
// Once user namespace creation is implemented,
|
||||||
|
// this should walk up the ancestor chain to verify
|
||||||
|
// whether `self` is an ancestor of `other`.
|
||||||
|
Arc::ptr_eq(self, other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NsCommonOps for UserNamespace {
|
||||||
|
const TYPE: NsType = NsType::User;
|
||||||
|
|
||||||
|
fn get_owner_user_ns(&self) -> Result<&Arc<UserNamespace>> {
|
||||||
|
return_errno_with_message!(
|
||||||
|
Errno::EPERM,
|
||||||
|
"a user namespace does not have an owner user namespace"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_parent(&self) -> Result<Arc<Self>> {
|
||||||
|
return_errno_with_message!(
|
||||||
|
Errno::EPERM,
|
||||||
|
"getting the parent of a user namespace is not supported"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn path(&self) -> &Path {
|
||||||
|
&self.path
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue