Enable the process management for cgroup
This commit is contained in:
parent
cda06613f0
commit
77fb73bdd7
|
|
@ -39,7 +39,9 @@ use spin::Once;
|
|||
|
||||
pub use self::{
|
||||
attr::{SysAttr, SysAttrSet, SysAttrSetBuilder},
|
||||
node::{SysBranchNode, SysNode, SysNodeId, SysNodeType, SysObj, SysPerms, SysSymlink},
|
||||
node::{
|
||||
SysBranchNode, SysNode, SysNodeId, SysNodeType, SysObj, SysPerms, SysSymlink, MAX_ATTR_SIZE,
|
||||
},
|
||||
tree::SysTree,
|
||||
utils::{
|
||||
AttrLessBranchNodeFields, BranchNodeFields, EmptyNode, NormalNodeFields, ObjFields,
|
||||
|
|
@ -95,6 +97,8 @@ pub enum Error {
|
|||
Overflow,
|
||||
/// Page fault occurred during memory access
|
||||
PageFault,
|
||||
/// The current systree item is dead
|
||||
IsDead,
|
||||
}
|
||||
|
||||
impl core::fmt::Display for Error {
|
||||
|
|
@ -108,6 +112,7 @@ impl core::fmt::Display for Error {
|
|||
Error::AlreadyExists => write!(f, "The systree item already exists"),
|
||||
Error::Overflow => write!(f, "Numerical overflow occurred"),
|
||||
Error::PageFault => write!(f, "Page fault occurred during memory access"),
|
||||
Error::IsDead => write!(f, "The current systree item is dead"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -347,6 +347,7 @@ impl From<aster_systree::Error> for Error {
|
|||
AlreadyExists => Error::new(Errno::EEXIST),
|
||||
Overflow => Error::new(Errno::EOVERFLOW),
|
||||
PageFault => Error::new(Errno::EFAULT),
|
||||
IsDead => Error::new(Errno::ENODEV),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,10 +6,15 @@ use ostd::sync::RwLock;
|
|||
|
||||
use super::fs::CgroupFs;
|
||||
use crate::{
|
||||
fs::utils::{
|
||||
systree_inode::{SysTreeInodeTy, SysTreeNodeKind},
|
||||
FileSystem, Inode, InodeMode, Metadata,
|
||||
fs::{
|
||||
cgroupfs::CgroupNode,
|
||||
path::{is_dot, is_dotdot},
|
||||
utils::{
|
||||
systree_inode::{SysTreeInodeTy, SysTreeNodeKind},
|
||||
FileSystem, Inode, InodeMode, Metadata,
|
||||
},
|
||||
},
|
||||
prelude::*,
|
||||
Result,
|
||||
};
|
||||
|
||||
|
|
@ -78,4 +83,33 @@ impl Inode for CgroupInode {
|
|||
fn fs(&self) -> Arc<dyn FileSystem> {
|
||||
CgroupFs::singleton().clone()
|
||||
}
|
||||
|
||||
fn rmdir(&self, name: &str) -> Result<()> {
|
||||
if is_dot(name) {
|
||||
return_errno_with_message!(Errno::EINVAL, "rmdir on .");
|
||||
}
|
||||
if is_dotdot(name) {
|
||||
return_errno_with_message!(Errno::ENOTEMPTY, "rmdir on ..");
|
||||
}
|
||||
|
||||
let SysTreeNodeKind::Branch(branch_node) = self.node_kind() else {
|
||||
return_errno_with_message!(Errno::ENOTDIR, "the current node is not a branch node");
|
||||
};
|
||||
|
||||
let Some(child) = branch_node.child(name) else {
|
||||
return_errno_with_message!(Errno::ENOENT, "the child node does not exist");
|
||||
};
|
||||
|
||||
let target_node = child.as_any().downcast_ref::<CgroupNode>().unwrap();
|
||||
|
||||
// This will succeed only if the child is empty and has not been removed.
|
||||
target_node.mark_as_dead()?;
|
||||
|
||||
// This is guaranteed to remove `child` because the dentry lock prevents
|
||||
// concurrent modification to the children, and there are no races because
|
||||
// `mark_as_dead` can succeed at most once.
|
||||
branch_node.remove_child(name).unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use fs::CgroupFsType;
|
||||
pub use systree_node::{CgroupMembership, CgroupNode};
|
||||
|
||||
mod fs;
|
||||
mod inode;
|
||||
|
|
|
|||
|
|
@ -4,16 +4,127 @@ use alloc::{
|
|||
string::ToString,
|
||||
sync::{Arc, Weak},
|
||||
};
|
||||
use core::fmt::Debug;
|
||||
use core::{
|
||||
fmt::Debug,
|
||||
sync::atomic::{AtomicUsize, Ordering},
|
||||
};
|
||||
|
||||
use aster_systree::{
|
||||
inherit_sys_branch_node, BranchNodeFields, Error, Result, SysAttrSetBuilder, SysBranchNode,
|
||||
SysObj, SysPerms, SysStr,
|
||||
SysObj, SysPerms, SysStr, MAX_ATTR_SIZE,
|
||||
};
|
||||
use aster_util::printer::VmPrinter;
|
||||
use inherit_methods_macro::inherit_methods;
|
||||
use ostd::mm::{VmReader, VmWriter};
|
||||
use spin::Once;
|
||||
|
||||
use crate::{
|
||||
prelude::*,
|
||||
process::{process_table, Pid, Process},
|
||||
};
|
||||
|
||||
/// A type that provides exclusive, synchronized access to modify cgroup membership.
|
||||
///
|
||||
/// This struct encapsulates the logic for moving processes between cgroups.
|
||||
/// By calling `CgroupMembership::lock()`, a thread can attempt to acquire a lock
|
||||
/// on the global instance. Upon success, it returns a guard that provides mutable
|
||||
/// access, allowing for safe cgroup membership modifications.
|
||||
///
|
||||
/// # Usage
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// // Acquire the lock.
|
||||
/// let membership = CgroupMembership::lock();
|
||||
///
|
||||
/// // Move a process to a new cgroup node.
|
||||
/// membership.move_process_to_node(process, &new_cgroup);
|
||||
///
|
||||
/// // The lock is automatically released when `membership` is dropped.
|
||||
/// ```
|
||||
pub struct CgroupMembership {
|
||||
_private: (),
|
||||
}
|
||||
|
||||
impl CgroupMembership {
|
||||
/// Acquires the lock on the global instance.
|
||||
///
|
||||
/// Returns a guard that provides mutable access to modify cgroup membership.
|
||||
pub fn lock() -> MutexGuard<'static, Self> {
|
||||
static CGROUP_MEMBERSHIP: Mutex<CgroupMembership> =
|
||||
Mutex::new(CgroupMembership { _private: () });
|
||||
|
||||
CGROUP_MEMBERSHIP.lock()
|
||||
}
|
||||
|
||||
/// Moves a process to the new cgroup node.
|
||||
///
|
||||
/// A process can only belong to one cgroup at a time.
|
||||
/// When moved to a new cgroup, it's automatically removed from the
|
||||
/// previous one.
|
||||
pub fn move_process_to_node(
|
||||
&mut self,
|
||||
process: Arc<Process>,
|
||||
new_cgroup: &CgroupNode,
|
||||
) -> Result<()> {
|
||||
if let Some(old_cgroup) = process.cgroup().get() {
|
||||
// Fast path: If the process is already in this cgroup, do nothing.
|
||||
if new_cgroup.id() == old_cgroup.id() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
old_cgroup
|
||||
.with_inner_mut(|old_cgroup_processes| {
|
||||
old_cgroup_processes.remove(&process.pid()).unwrap();
|
||||
if old_cgroup_processes.is_empty() {
|
||||
let old_count = old_cgroup.populated_count.fetch_sub(1, Ordering::Relaxed);
|
||||
if old_count == 1 {
|
||||
old_cgroup.propagate_sub_populated();
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
};
|
||||
|
||||
new_cgroup
|
||||
.with_inner_mut(|current_processes| {
|
||||
if current_processes.is_empty() {
|
||||
let old_count = new_cgroup.populated_count.fetch_add(1, Ordering::Relaxed);
|
||||
if old_count == 0 {
|
||||
new_cgroup.propagate_add_populated();
|
||||
}
|
||||
}
|
||||
current_processes.insert(process.pid(), Arc::downgrade(&process));
|
||||
})
|
||||
.ok_or(Error::IsDead)?;
|
||||
|
||||
process.set_cgroup(Some(new_cgroup.fields.weak_self().upgrade().unwrap()));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Moves a process to the root cgroup.
|
||||
pub fn move_process_to_root(&mut self, process: &Process) {
|
||||
let process_cgroup = process.cgroup();
|
||||
let Some(old_cgroup) = process_cgroup.get() else {
|
||||
return;
|
||||
};
|
||||
|
||||
old_cgroup
|
||||
.with_inner_mut(|old_cgroup_processes| {
|
||||
old_cgroup_processes.remove(&process.pid()).unwrap();
|
||||
if old_cgroup_processes.is_empty() {
|
||||
let old_count = old_cgroup.populated_count.fetch_sub(1, Ordering::Relaxed);
|
||||
if old_count == 1 {
|
||||
old_cgroup.propagate_sub_populated();
|
||||
}
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
process.set_cgroup(None);
|
||||
}
|
||||
}
|
||||
|
||||
/// The root of a cgroup hierarchy, serving as the entry point to
|
||||
/// the entire cgroup control system.
|
||||
///
|
||||
|
|
@ -29,9 +140,39 @@ pub(super) struct CgroupSystem {
|
|||
/// Each node can bind a group of processes together for purpose of resource
|
||||
/// management. Except for the root node, all nodes in the cgroup tree are of
|
||||
/// this type.
|
||||
#[derive(Debug)]
|
||||
struct CgroupNode {
|
||||
pub struct CgroupNode {
|
||||
fields: BranchNodeFields<CgroupNode, Self>,
|
||||
/// The inner data. If it is `None`, then the cgroup node is dead.
|
||||
inner: RwMutex<Option<Inner>>,
|
||||
/// The depth of the node in the cgroupfs [`SysTree`], where the child of
|
||||
/// the root node has a depth of 1.
|
||||
depth: usize,
|
||||
/// Tracks the "populated" status of this node and its direct children.
|
||||
///
|
||||
/// The count is the sum of:
|
||||
/// - The number of its direct children that are populated.
|
||||
/// - A value of 1 if this node itself contains processes.
|
||||
///
|
||||
/// "populated": A node is considered populated if it has bound processes
|
||||
/// either on itself or in any of its descendant nodes. Consequently,
|
||||
/// a count > 0 indicates that this node is populated.
|
||||
populated_count: AtomicUsize,
|
||||
}
|
||||
|
||||
impl Debug for CgroupNode {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.debug_struct("CgroupNode")
|
||||
.field("fields", &self.fields)
|
||||
.field("populated_count", &self.populated_count)
|
||||
.field("depth", &self.depth)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Inner {
|
||||
/// Processes bound to the cgroup node.
|
||||
processes: BTreeMap<Pid, Weak<Process>>,
|
||||
}
|
||||
|
||||
#[inherit_methods(from = "self.fields")]
|
||||
|
|
@ -67,6 +208,10 @@ impl CgroupSystem {
|
|||
SysStr::from("cgroup.max.depth"),
|
||||
SysPerms::DEFAULT_RW_ATTR_PERMS,
|
||||
);
|
||||
builder.add(
|
||||
SysStr::from("cgroup.procs"),
|
||||
SysPerms::DEFAULT_RW_ATTR_PERMS,
|
||||
);
|
||||
builder.add(
|
||||
SysStr::from("cgroup.threads"),
|
||||
SysPerms::DEFAULT_RW_ATTR_PERMS,
|
||||
|
|
@ -86,7 +231,7 @@ impl CgroupSystem {
|
|||
}
|
||||
|
||||
impl CgroupNode {
|
||||
pub(self) fn new(name: SysStr) -> Arc<Self> {
|
||||
pub(self) fn new(name: SysStr, depth: usize) -> Arc<Self> {
|
||||
let mut builder = SysAttrSetBuilder::new();
|
||||
// TODO: Add more attributes as needed. The normal cgroup node may have
|
||||
// more attributes than the unified one.
|
||||
|
|
@ -98,6 +243,10 @@ impl CgroupNode {
|
|||
SysStr::from("cgroup.max.depth"),
|
||||
SysPerms::DEFAULT_RW_ATTR_PERMS,
|
||||
);
|
||||
builder.add(
|
||||
SysStr::from("cgroup.procs"),
|
||||
SysPerms::DEFAULT_RW_ATTR_PERMS,
|
||||
);
|
||||
builder.add(
|
||||
SysStr::from("cgroup.threads"),
|
||||
SysPerms::DEFAULT_RW_ATTR_PERMS,
|
||||
|
|
@ -107,15 +256,125 @@ impl CgroupNode {
|
|||
SysPerms::DEFAULT_RW_ATTR_PERMS,
|
||||
);
|
||||
builder.add(SysStr::from("cpu.stat"), SysPerms::DEFAULT_RO_ATTR_PERMS);
|
||||
builder.add(
|
||||
SysStr::from("cgroup.events"),
|
||||
SysPerms::DEFAULT_RO_ATTR_PERMS,
|
||||
);
|
||||
|
||||
let attrs = builder.build().expect("Failed to build attribute set");
|
||||
Arc::new_cyclic(|weak_self| {
|
||||
let fields = BranchNodeFields::new(name, attrs, weak_self.clone());
|
||||
CgroupNode { fields }
|
||||
CgroupNode {
|
||||
fields,
|
||||
inner: RwMutex::new(Some(Inner::default())),
|
||||
depth,
|
||||
populated_count: AtomicUsize::new(0),
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// For process management
|
||||
impl CgroupNode {
|
||||
fn propagate_add_populated(&self) {
|
||||
if self.depth <= 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut current_parent = Arc::downcast::<CgroupNode>(self.parent().unwrap()).unwrap();
|
||||
loop {
|
||||
let old_count = current_parent
|
||||
.populated_count
|
||||
.fetch_add(1, Ordering::Relaxed);
|
||||
if old_count > 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
if current_parent.depth == 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
current_parent = Arc::downcast::<CgroupNode>(current_parent.parent().unwrap()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn propagate_sub_populated(&self) {
|
||||
if self.depth <= 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut current_parent = Arc::downcast::<CgroupNode>(self.parent().unwrap()).unwrap();
|
||||
loop {
|
||||
let old_count = current_parent
|
||||
.populated_count
|
||||
.fetch_sub(1, Ordering::Relaxed);
|
||||
if old_count != 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
if current_parent.depth == 1 {
|
||||
break;
|
||||
}
|
||||
|
||||
current_parent = Arc::downcast::<CgroupNode>(current_parent.parent().unwrap()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Performs a read-only operation on the inner data.
|
||||
///
|
||||
/// If the cgroup node is dead, returns `None`.
|
||||
fn with_inner<F, R>(&self, op: F) -> Option<R>
|
||||
where
|
||||
F: FnOnce(&BTreeMap<Pid, Weak<Process>>) -> R,
|
||||
{
|
||||
let inner = self.inner.read();
|
||||
let inner_ref = inner.as_ref()?;
|
||||
|
||||
Some(op(&inner_ref.processes))
|
||||
}
|
||||
|
||||
/// Performs a mutable operation on the inner data.
|
||||
///
|
||||
/// If the cgroup node is dead, returns `None`.
|
||||
fn with_inner_mut<F, R>(&self, op: F) -> Option<R>
|
||||
where
|
||||
F: FnOnce(&mut BTreeMap<Pid, Weak<Process>>) -> R,
|
||||
{
|
||||
let mut inner = self.inner.write();
|
||||
let inner_ref = inner.as_mut()?;
|
||||
|
||||
Some(op(&mut inner_ref.processes))
|
||||
}
|
||||
|
||||
/// Marks this cgroup node as dead.
|
||||
///
|
||||
/// This will succeed only if the cgroup node is empty and is alive.
|
||||
/// Here, a cgroup node is considered empty if it has no child nodes and no
|
||||
/// processes bound to it.
|
||||
pub(super) fn mark_as_dead(&self) -> crate::Result<()> {
|
||||
let mut inner = self.inner.write();
|
||||
let Some(inner_ref) = inner.as_ref() else {
|
||||
return_errno_with_message!(Errno::ENOENT, "the cgroup node is already dead");
|
||||
};
|
||||
|
||||
if !inner_ref.processes.is_empty() {
|
||||
return_errno_with_message!(Errno::EBUSY, "the cgroup hierarchy still has processes");
|
||||
}
|
||||
|
||||
let children = self.fields.children_ref().read();
|
||||
if !children.is_empty() {
|
||||
return_errno_with_message!(
|
||||
Errno::ENOTEMPTY,
|
||||
"only an empty cgroup hierarchy can be removed"
|
||||
);
|
||||
}
|
||||
|
||||
*inner = None;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
inherit_sys_branch_node!(CgroupSystem, fields, {
|
||||
fn is_root(&self) -> bool {
|
||||
true
|
||||
|
|
@ -125,14 +384,50 @@ inherit_sys_branch_node!(CgroupSystem, fields, {
|
|||
// This method should be a no-op for `RootNode`.
|
||||
}
|
||||
|
||||
fn read_attr_at(&self, _name: &str, _offset: usize, _writer: &mut VmWriter) -> Result<usize> {
|
||||
// TODO: Add support for reading attributes.
|
||||
Err(Error::AttributeError)
|
||||
fn read_attr_at(&self, name: &str, offset: usize, writer: &mut VmWriter) -> Result<usize> {
|
||||
let mut printer = VmPrinter::new_skip(writer, offset);
|
||||
match name {
|
||||
"cgroup.procs" => {
|
||||
let process_table = process_table::process_table_mut();
|
||||
for process in process_table.iter() {
|
||||
if process.cgroup().is_none() {
|
||||
writeln!(printer, "{}", process.pid())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// TODO: Add support for reading other attributes.
|
||||
return Err(Error::AttributeError);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(printer.bytes_written())
|
||||
}
|
||||
|
||||
fn write_attr(&self, _name: &str, _reader: &mut VmReader) -> Result<usize> {
|
||||
// TODO: Add support for writing attributes.
|
||||
Err(Error::AttributeError)
|
||||
fn write_attr(&self, name: &str, reader: &mut VmReader) -> Result<usize> {
|
||||
match name {
|
||||
"cgroup.procs" => {
|
||||
let (content, len) = reader
|
||||
.read_cstring_until_end(MAX_ATTR_SIZE)
|
||||
.map_err(|_| Error::PageFault)?;
|
||||
let pid = content
|
||||
.to_str()
|
||||
.ok()
|
||||
.and_then(|string| string.trim().parse::<Pid>().ok())
|
||||
.ok_or(Error::InvalidOperation)?;
|
||||
|
||||
with_process_cgroup_locked(pid, |process, cgroup_membership| {
|
||||
cgroup_membership.move_process_to_root(&process);
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(len)
|
||||
}
|
||||
_ => {
|
||||
// TODO: Add support for writing other attributes.
|
||||
Err(Error::AttributeError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn perms(&self) -> SysPerms {
|
||||
|
|
@ -140,21 +435,72 @@ inherit_sys_branch_node!(CgroupSystem, fields, {
|
|||
}
|
||||
|
||||
fn create_child(&self, name: &str) -> Result<Arc<dyn SysObj>> {
|
||||
let new_child = CgroupNode::new(name.to_string().into());
|
||||
let new_child = CgroupNode::new(name.to_string().into(), 1);
|
||||
self.add_child(new_child.clone())?;
|
||||
Ok(new_child)
|
||||
}
|
||||
});
|
||||
|
||||
inherit_sys_branch_node!(CgroupNode, fields, {
|
||||
fn read_attr_at(&self, _name: &str, _offset: usize, _writer: &mut VmWriter) -> Result<usize> {
|
||||
// TODO: Add support for reading attributes.
|
||||
Err(Error::AttributeError)
|
||||
fn read_attr_at(&self, name: &str, offset: usize, writer: &mut VmWriter) -> Result<usize> {
|
||||
self.with_inner(|processes| {
|
||||
let mut printer = VmPrinter::new_skip(writer, offset);
|
||||
match name {
|
||||
"cgroup.procs" => {
|
||||
for pid in processes.keys() {
|
||||
writeln!(printer, "{}", pid)?;
|
||||
}
|
||||
}
|
||||
"cgroup.events" => {
|
||||
let res = if self.populated_count.load(Ordering::Relaxed) > 0 {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
writeln!(printer, "populated {}", res)?;
|
||||
// Currently we have not enabled the "frozen" attribute
|
||||
// so the "frozen" field is always zero.
|
||||
writeln!(printer, "frozen {}", 0)?;
|
||||
}
|
||||
_ => {
|
||||
// TODO: Add support for reading other attributes.
|
||||
return Err(Error::AttributeError);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(printer.bytes_written())
|
||||
})
|
||||
.ok_or(Error::IsDead)?
|
||||
}
|
||||
|
||||
fn write_attr(&self, _name: &str, _reader: &mut VmReader) -> Result<usize> {
|
||||
// TODO: Add support for writing attributes.
|
||||
Err(Error::AttributeError)
|
||||
fn write_attr(&self, name: &str, reader: &mut VmReader) -> Result<usize> {
|
||||
match name {
|
||||
"cgroup.procs" => {
|
||||
let (content, len) = reader
|
||||
.read_cstring_until_end(MAX_ATTR_SIZE)
|
||||
.map_err(|_| Error::PageFault)?;
|
||||
let pid = content
|
||||
.to_str()
|
||||
.ok()
|
||||
.and_then(|string| string.trim().parse::<Pid>().ok())
|
||||
.ok_or(Error::InvalidOperation)?;
|
||||
|
||||
with_process_cgroup_locked(pid, |target_process, cgroup_membership| {
|
||||
// TODO: According to the "no internal processes" rule of cgroupv2
|
||||
// (Ref: https://man7.org/linux/man-pages/man7/cgroups.7.html),
|
||||
// if the cgroup node has enabled some controllers like "memory", "io",
|
||||
// it is forbidden to bind a process to an internal cgroup node.
|
||||
cgroup_membership.move_process_to_node(target_process, self)
|
||||
})?;
|
||||
|
||||
Ok(len)
|
||||
}
|
||||
_ => {
|
||||
// TODO: Add support for writing other attributes.
|
||||
Err(Error::AttributeError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn perms(&self) -> SysPerms {
|
||||
|
|
@ -162,8 +508,37 @@ inherit_sys_branch_node!(CgroupNode, fields, {
|
|||
}
|
||||
|
||||
fn create_child(&self, name: &str) -> Result<Arc<dyn SysObj>> {
|
||||
let new_child = CgroupNode::new(name.to_string().into());
|
||||
self.add_child(new_child.clone())?;
|
||||
Ok(new_child)
|
||||
self.with_inner(|_| {
|
||||
let new_child = CgroupNode::new(name.to_string().into(), self.depth + 1);
|
||||
self.add_child(new_child.clone())?;
|
||||
Ok(new_child as _)
|
||||
})
|
||||
// TODO: This should be checked at upper layers.
|
||||
.ok_or(Error::NotFound)?
|
||||
}
|
||||
});
|
||||
|
||||
/// A helper function to safely perform an operation on a process's cgroup.
|
||||
///
|
||||
/// The given `pid` means the PID of the target process. A PID of 0 refers to the
|
||||
/// current process.
|
||||
///
|
||||
/// Returns `Error::InvalidOperation` if the PID is not found or if the target
|
||||
/// process is a zombie.
|
||||
fn with_process_cgroup_locked<F>(pid: Pid, op: F) -> Result<()>
|
||||
where
|
||||
F: FnOnce(Arc<Process>, &mut CgroupMembership) -> Result<()>,
|
||||
{
|
||||
let process = if pid == 0 {
|
||||
current!()
|
||||
} else {
|
||||
process_table::get_process(pid).ok_or(Error::InvalidOperation)?
|
||||
};
|
||||
|
||||
let mut cgroup_guard = CgroupMembership::lock();
|
||||
if process.status().is_zombie() {
|
||||
return Err(Error::InvalidOperation);
|
||||
}
|
||||
|
||||
op(process, &mut cgroup_guard)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -307,7 +307,9 @@ impl<KInode: SysTreeInodeTy + Send + Sync + 'static> Inode for KInode {
|
|||
}
|
||||
|
||||
default fn resize(&self, _new_size: usize) -> Result<()> {
|
||||
Err(Error::new(Errno::EPERM))
|
||||
// The `resize` operation should be ignored by kernelfs inodes,
|
||||
// and should not incur an error.
|
||||
Ok(())
|
||||
}
|
||||
|
||||
default fn atime(&self) -> Duration {
|
||||
|
|
@ -434,6 +436,10 @@ impl<KInode: SysTreeInodeTy + Send + Sync + 'static> Inode for KInode {
|
|||
Err(Error::new(Errno::EPERM))
|
||||
}
|
||||
|
||||
default fn rmdir(&self, _name: &str) -> Result<()> {
|
||||
Err(Error::new(Errno::EPERM))
|
||||
}
|
||||
|
||||
default fn rename(
|
||||
&self,
|
||||
_old_name: &str,
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ use crate::{
|
|||
cpu::LinuxAbi,
|
||||
current_userspace,
|
||||
fs::{
|
||||
cgroupfs::CgroupMembership,
|
||||
file_table::{FdFlags, FileTable},
|
||||
thread_info::ThreadFsInfo,
|
||||
},
|
||||
|
|
@ -288,6 +289,15 @@ pub fn clone_child(
|
|||
Ok(child_tid)
|
||||
} else {
|
||||
let child_process = clone_child_process(ctx, parent_context, clone_args)?;
|
||||
|
||||
let mut cgroup_guard = CgroupMembership::lock();
|
||||
if let Some(cgroup) = ctx.process.cgroup().get() {
|
||||
cgroup_guard
|
||||
.move_process_to_node(child_process.clone(), &cgroup)
|
||||
.unwrap();
|
||||
}
|
||||
drop(cgroup_guard);
|
||||
|
||||
if clone_args.flags.contains(CloneFlags::CLONE_VFORK) {
|
||||
child_process.status().set_vfork_child(true);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,10 @@
|
|||
use core::sync::atomic::Ordering;
|
||||
|
||||
use super::{process_table, Pid, Process};
|
||||
use crate::{events::IoEvents, prelude::*, process::signal::signals::kernel::KernelSignal};
|
||||
use crate::{
|
||||
events::IoEvents, fs::cgroupfs::CgroupMembership, prelude::*,
|
||||
process::signal::signals::kernel::KernelSignal,
|
||||
};
|
||||
|
||||
/// Exits the current POSIX process.
|
||||
///
|
||||
|
|
@ -26,6 +29,11 @@ pub(super) fn exit_process(current_process: &Process) {
|
|||
move_children_to_reaper_process(current_process);
|
||||
|
||||
send_child_death_signal(current_process);
|
||||
|
||||
// Remove the process from the cgroup.
|
||||
let mut cgroup_guard = CgroupMembership::lock();
|
||||
cgroup_guard.move_process_to_root(current_process);
|
||||
drop(cgroup_guard);
|
||||
}
|
||||
|
||||
/// Sends parent-death signals to the children.
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ use super::{
|
|||
task_set::TaskSet,
|
||||
};
|
||||
use crate::{
|
||||
fs::cgroupfs::CgroupNode,
|
||||
prelude::*,
|
||||
process::{
|
||||
signal::{sig_queues::SigQueues, Pollee},
|
||||
|
|
@ -42,7 +43,10 @@ mod timer_manager;
|
|||
use atomic_integer_wrapper::define_atomic_version_of_integer_like_type;
|
||||
pub use init_proc::spawn_init_process;
|
||||
pub use job_control::JobControl;
|
||||
use ostd::{sync::WaitQueue, task::Task};
|
||||
use ostd::{
|
||||
sync::{RcuOption, RcuOptionReadGuard, WaitQueue},
|
||||
task::Task,
|
||||
};
|
||||
pub use process_group::ProcessGroup;
|
||||
pub use session::Session;
|
||||
pub use terminal::Terminal;
|
||||
|
|
@ -98,6 +102,10 @@ pub struct Process {
|
|||
reaped_children_stats: Mutex<ReapedChildrenStats>,
|
||||
/// resource limits
|
||||
resource_limits: ResourceLimits,
|
||||
/// The bound cgroup of the process.
|
||||
///
|
||||
/// If this field is `None`, the process is bound to the root cgroup.
|
||||
cgroup: RcuOption<Arc<CgroupNode>>,
|
||||
/// Scheduling priority nice value
|
||||
/// According to POSIX.1, the nice value is a per-process attribute,
|
||||
/// the threads in a process should share a nice value.
|
||||
|
|
@ -243,6 +251,7 @@ impl Process {
|
|||
parent_death_signal: AtomicSigNum::new_empty(),
|
||||
exit_signal: AtomicSigNum::new_empty(),
|
||||
resource_limits,
|
||||
cgroup: RcuOption::new(None),
|
||||
nice: AtomicNice::new(nice),
|
||||
oom_score_adj: AtomicI16::new(oom_score_adj),
|
||||
timer_manager: PosixTimerManager::new(&prof_clock, process_ref),
|
||||
|
|
@ -786,6 +795,28 @@ impl Process {
|
|||
pub fn user_ns(&self) -> &Mutex<Arc<UserNamespace>> {
|
||||
&self.user_ns
|
||||
}
|
||||
|
||||
// ******************* cgroup ********************
|
||||
|
||||
/// Returns a RCU read guard to the cgroup of the process.
|
||||
///
|
||||
/// The returned cgroup is not a stable snapshot. It may be changed by other threads
|
||||
/// and encounter race conditions. Users can use [`lock_cgroup_membership`] to obtain
|
||||
/// a lock to prevent the cgroup from being changed.
|
||||
///
|
||||
/// [`lock_cgroup_membership`]: crate::fs::cgroupfs::lock_cgroup_membership
|
||||
pub fn cgroup(&self) -> RcuOptionReadGuard<Arc<CgroupNode>> {
|
||||
self.cgroup.read()
|
||||
}
|
||||
|
||||
/// Sets the cgroup for this process.
|
||||
///
|
||||
/// Note: This function should only be called within the cgroup module.
|
||||
/// Arbitrary calls may likely cause race conditions.
|
||||
#[doc(hidden)]
|
||||
pub fn set_cgroup(&self, cgroup: Option<Arc<CgroupNode>>) {
|
||||
self.cgroup.update(cgroup);
|
||||
}
|
||||
}
|
||||
|
||||
/// Enqueues a process-directed kernel signal asynchronously.
|
||||
|
|
|
|||
Loading…
Reference in New Issue