feat(procfs): add /proc/sys/net/ipv4/ip_local_port_range interface (#1749)
* feat(procfs): add /proc/sys/net/ipv4/ip_local_port_range interface - Add net module to procfs sys directory with IPv4 support - Implement ip_local_port_range file for reading/writing port range configuration - Update PortManager to support configurable local port range with atomic operations - Extend ephemeral port allocation to use configured range instead of hardcoded values - Add gvisor test cases for IPv4 UDP socket functionality Signed-off-by: longjin <longjin@DragonOS.org>
This commit is contained in:
parent
03c943ae77
commit
6e8e8c6411
|
|
@ -246,6 +246,7 @@ pub struct IfaceCommon {
|
|||
tcp_close_defer: crate::net::tcp_close_defer::TcpCloseDefer,
|
||||
/// TCP listener/backlog 语义辅助(Linux-like 丢 SYN 等)。
|
||||
tcp_listener_backlog: crate::net::tcp_listener_backlog::TcpListenerBacklog,
|
||||
ipv4_multicast_refcnt: Mutex<Vec<(smoltcp::wire::Ipv4Address, usize)>>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for IfaceCommon {
|
||||
|
|
@ -284,6 +285,7 @@ impl IfaceCommon {
|
|||
packet_sockets: RwLock::new(Vec::new()),
|
||||
tcp_close_defer: crate::net::tcp_close_defer::TcpCloseDefer::new(),
|
||||
tcp_listener_backlog: crate::net::tcp_listener_backlog::TcpListenerBacklog::new(),
|
||||
ipv4_multicast_refcnt: Mutex::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -298,6 +300,38 @@ impl IfaceCommon {
|
|||
self.tcp_listener_backlog.unregister_tcp_listen_port(port);
|
||||
}
|
||||
|
||||
pub fn ipv4_multicast_join_ref(
|
||||
&self,
|
||||
group: smoltcp::wire::Ipv4Address,
|
||||
) -> Result<(), smoltcp::iface::MulticastError> {
|
||||
let mut guard = self.ipv4_multicast_refcnt.lock();
|
||||
if let Some((_, ref mut cnt)) = guard.iter_mut().find(|(g, _)| *g == group) {
|
||||
*cnt = cnt.saturating_add(1);
|
||||
return Ok(());
|
||||
}
|
||||
self.smol_iface
|
||||
.lock()
|
||||
.join_multicast_group(smoltcp::wire::IpAddress::Ipv4(group))?;
|
||||
guard.push((group, 1));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn ipv4_multicast_leave_ref(&self, group: smoltcp::wire::Ipv4Address) {
|
||||
let mut guard = self.ipv4_multicast_refcnt.lock();
|
||||
let Some(pos) = guard.iter().position(|(g, _)| *g == group) else {
|
||||
return;
|
||||
};
|
||||
if guard[pos].1 > 1 {
|
||||
guard[pos].1 -= 1;
|
||||
return;
|
||||
}
|
||||
guard.swap_remove(pos);
|
||||
let _ = self
|
||||
.smol_iface
|
||||
.lock()
|
||||
.leave_multicast_group(smoltcp::wire::IpAddress::Ipv4(group));
|
||||
}
|
||||
|
||||
/// 驱动收包入口使用的通用丢包策略(避免驱动理解 L4 语义)。
|
||||
#[inline]
|
||||
pub fn should_drop_rx_packet(&self, packet: &[u8]) -> bool {
|
||||
|
|
@ -350,8 +384,7 @@ impl IfaceCommon {
|
|||
|
||||
// Reclaim TCP sockets that have fully closed.
|
||||
// Lock order: sockets -> tcp_close_defer (matches close path, which may touch sockets then defer close).
|
||||
self.tcp_close_defer
|
||||
.reap_closed(&mut sockets, &self.port_manager);
|
||||
self.tcp_close_defer.reap_closed(&mut sockets);
|
||||
|
||||
// drop sockets here to avoid deadlock
|
||||
drop(interface);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
//! 提供类似 Linux 的 /proc/sys 接口,支持动态配置内核参数
|
||||
|
||||
mod kernel;
|
||||
mod net;
|
||||
mod vm;
|
||||
|
||||
use crate::filesystem::{
|
||||
|
|
@ -14,6 +15,7 @@ use alloc::{
|
|||
sync::{Arc, Weak},
|
||||
};
|
||||
use kernel::KernelDirOps;
|
||||
use net::NetDirOps;
|
||||
use system_error::SystemError;
|
||||
use vm::VmDirOps;
|
||||
|
||||
|
|
@ -58,6 +60,16 @@ impl DirOps for SysDirOps {
|
|||
cached_children.insert(name.to_string(), inode.clone());
|
||||
return Ok(inode);
|
||||
}
|
||||
if name == "net" {
|
||||
let mut cached_children = dir.cached_children().write();
|
||||
if let Some(child) = cached_children.get(name) {
|
||||
return Ok(child.clone());
|
||||
}
|
||||
|
||||
let inode = NetDirOps::new_inode(dir.self_ref_weak().clone());
|
||||
cached_children.insert(name.to_string(), inode.clone());
|
||||
return Ok(inode);
|
||||
}
|
||||
|
||||
Err(SystemError::ENOENT)
|
||||
}
|
||||
|
|
@ -70,5 +82,8 @@ impl DirOps for SysDirOps {
|
|||
cached_children
|
||||
.entry("vm".to_string())
|
||||
.or_insert_with(|| VmDirOps::new_inode(dir.self_ref_weak().clone()));
|
||||
cached_children
|
||||
.entry("net".to_string())
|
||||
.or_insert_with(|| NetDirOps::new_inode(dir.self_ref_weak().clone()));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,108 @@
|
|||
use crate::filesystem::{
|
||||
procfs::{
|
||||
template::{Builder, DirOps, FileOps, ProcDir, ProcDirBuilder, ProcFileBuilder},
|
||||
utils::proc_read,
|
||||
},
|
||||
vfs::{FilePrivateData, IndexNode, InodeMode},
|
||||
};
|
||||
use crate::libs::mutex::MutexGuard;
|
||||
use crate::net::socket::inet::common::port::PortManager;
|
||||
use alloc::{
|
||||
format,
|
||||
string::{String, ToString},
|
||||
sync::{Arc, Weak},
|
||||
vec::Vec,
|
||||
};
|
||||
use system_error::SystemError;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Ipv4DirOps;
|
||||
|
||||
impl Ipv4DirOps {
|
||||
pub fn new_inode(parent: Weak<dyn IndexNode>) -> Arc<dyn IndexNode> {
|
||||
ProcDirBuilder::new(Self, InodeMode::from_bits_truncate(0o555))
|
||||
.parent(parent)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl DirOps for Ipv4DirOps {
|
||||
fn lookup_child(
|
||||
&self,
|
||||
dir: &ProcDir<Self>,
|
||||
name: &str,
|
||||
) -> Result<Arc<dyn IndexNode>, SystemError> {
|
||||
if name == "ip_local_port_range" {
|
||||
let mut cached_children = dir.cached_children().write();
|
||||
if let Some(child) = cached_children.get(name) {
|
||||
return Ok(child.clone());
|
||||
}
|
||||
|
||||
let inode = IpLocalPortRangeFileOps::new_inode(dir.self_ref_weak().clone());
|
||||
cached_children.insert(name.to_string(), inode.clone());
|
||||
return Ok(inode);
|
||||
}
|
||||
|
||||
Err(SystemError::ENOENT)
|
||||
}
|
||||
|
||||
fn populate_children(&self, dir: &ProcDir<Self>) {
|
||||
let mut cached_children = dir.cached_children().write();
|
||||
cached_children
|
||||
.entry("ip_local_port_range".to_string())
|
||||
.or_insert_with(|| IpLocalPortRangeFileOps::new_inode(dir.self_ref_weak().clone()));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IpLocalPortRangeFileOps;
|
||||
|
||||
impl IpLocalPortRangeFileOps {
|
||||
pub fn new_inode(parent: Weak<dyn IndexNode>) -> Arc<dyn IndexNode> {
|
||||
ProcFileBuilder::new(Self, InodeMode::from_bits_truncate(0o644))
|
||||
.parent(parent)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn read_config() -> String {
|
||||
let (min, max) = PortManager::local_port_range();
|
||||
format!("{} {}\n", min, max)
|
||||
}
|
||||
|
||||
fn write_config(data: &[u8]) -> Result<usize, SystemError> {
|
||||
let input = core::str::from_utf8(data).map_err(|_| SystemError::EINVAL)?;
|
||||
let parts: Vec<&str> = input.split_whitespace().collect();
|
||||
if parts.len() < 2 {
|
||||
return Err(SystemError::EINVAL);
|
||||
}
|
||||
let min: u16 = parts[0].parse().map_err(|_| SystemError::EINVAL)?;
|
||||
let max: u16 = parts[1].parse().map_err(|_| SystemError::EINVAL)?;
|
||||
PortManager::set_local_port_range(min, max)?;
|
||||
Ok(data.len())
|
||||
}
|
||||
}
|
||||
|
||||
impl FileOps for IpLocalPortRangeFileOps {
|
||||
fn read_at(
|
||||
&self,
|
||||
offset: usize,
|
||||
len: usize,
|
||||
buf: &mut [u8],
|
||||
_data: MutexGuard<FilePrivateData>,
|
||||
) -> Result<usize, SystemError> {
|
||||
let content = Self::read_config();
|
||||
proc_read(offset, len, buf, content.as_bytes())
|
||||
}
|
||||
|
||||
fn write_at(
|
||||
&self,
|
||||
_offset: usize,
|
||||
_len: usize,
|
||||
buf: &[u8],
|
||||
_data: MutexGuard<FilePrivateData>,
|
||||
) -> Result<usize, SystemError> {
|
||||
Self::write_config(buf)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
mod ipv4;
|
||||
|
||||
use crate::filesystem::{
|
||||
procfs::template::{DirOps, ProcDir, ProcDirBuilder},
|
||||
vfs::{IndexNode, InodeMode},
|
||||
};
|
||||
use alloc::string::ToString;
|
||||
use alloc::sync::{Arc, Weak};
|
||||
use ipv4::Ipv4DirOps;
|
||||
use system_error::SystemError;
|
||||
|
||||
use crate::filesystem::procfs::Builder;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NetDirOps;
|
||||
|
||||
impl NetDirOps {
|
||||
pub fn new_inode(parent: Weak<dyn IndexNode>) -> Arc<dyn IndexNode> {
|
||||
ProcDirBuilder::new(Self, InodeMode::from_bits_truncate(0o555))
|
||||
.parent(parent)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl DirOps for NetDirOps {
|
||||
fn lookup_child(
|
||||
&self,
|
||||
dir: &ProcDir<Self>,
|
||||
name: &str,
|
||||
) -> Result<Arc<dyn IndexNode>, SystemError> {
|
||||
if name == "ipv4" {
|
||||
let mut cached_children = dir.cached_children().write();
|
||||
if let Some(child) = cached_children.get(name) {
|
||||
return Ok(child.clone());
|
||||
}
|
||||
|
||||
let inode = Ipv4DirOps::new_inode(dir.self_ref_weak().clone());
|
||||
cached_children.insert(name.to_string(), inode.clone());
|
||||
return Ok(inode);
|
||||
}
|
||||
|
||||
Err(SystemError::ENOENT)
|
||||
}
|
||||
|
||||
fn populate_children(&self, dir: &ProcDir<Self>) {
|
||||
let mut cached_children = dir.cached_children().write();
|
||||
cached_children
|
||||
.entry("ipv4".to_string())
|
||||
.or_insert_with(|| Ipv4DirOps::new_inode(dir.self_ref_weak().clone()));
|
||||
}
|
||||
}
|
||||
|
|
@ -111,10 +111,7 @@ pub fn drop_ipv4_memberships(
|
|||
if let Some(iface) = find_iface_by_ifindex(netns, entry.ifindex) {
|
||||
let bytes = entry.multiaddr.to_ne_bytes();
|
||||
let multi = Ipv4Address::new(bytes[0], bytes[1], bytes[2], bytes[3]);
|
||||
let _ = iface
|
||||
.smol_iface()
|
||||
.lock()
|
||||
.leave_multicast_group(IpAddress::Ipv4(multi));
|
||||
iface.common().ipv4_multicast_leave_ref(multi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -247,10 +244,7 @@ pub fn apply_ipv4_membership(
|
|||
});
|
||||
}
|
||||
|
||||
let join_result = {
|
||||
let mut smol_iface = iface.smol_iface().lock();
|
||||
smol_iface.join_multicast_group(IpAddress::Ipv4(multi_ipv4))
|
||||
};
|
||||
let join_result = iface.common().ipv4_multicast_join_ref(multi_ipv4);
|
||||
if let Err(e) = join_result {
|
||||
{
|
||||
let mut groups = groups.lock();
|
||||
|
|
@ -265,7 +259,7 @@ pub fn apply_ipv4_membership(
|
|||
Ok(())
|
||||
}
|
||||
IpOption::DROP_MEMBERSHIP => {
|
||||
let (did_remove, still_joined) = {
|
||||
let did_remove = {
|
||||
let mut groups = groups.lock();
|
||||
let pos = groups.iter().position(|g| {
|
||||
if g.multiaddr != multi {
|
||||
|
|
@ -281,12 +275,9 @@ pub fn apply_ipv4_membership(
|
|||
});
|
||||
if let Some(idx) = pos {
|
||||
groups.swap_remove(idx);
|
||||
let still_joined = groups
|
||||
.iter()
|
||||
.any(|g| g.multiaddr == multi && g.ifindex == resolved_ifindex);
|
||||
(true, still_joined)
|
||||
true
|
||||
} else {
|
||||
(false, false)
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -294,12 +285,7 @@ pub fn apply_ipv4_membership(
|
|||
return Err(SystemError::EADDRNOTAVAIL);
|
||||
}
|
||||
|
||||
if !still_joined {
|
||||
let _ = iface
|
||||
.smol_iface()
|
||||
.lock()
|
||||
.leave_multicast_group(IpAddress::Ipv4(multi_ipv4));
|
||||
}
|
||||
iface.common().ipv4_multicast_leave_ref(multi_ipv4);
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(SystemError::ENOPROTOOPT),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use alloc::vec::Vec;
|
||||
use core::sync::atomic::{AtomicU16, Ordering};
|
||||
use hashbrown::HashMap;
|
||||
use smoltcp::wire::IpAddress;
|
||||
use system_error::SystemError;
|
||||
|
|
@ -30,33 +31,45 @@ impl Default for PortManager {
|
|||
}
|
||||
}
|
||||
|
||||
pub const DEFAULT_LOCAL_PORT_RANGE: u32 = (32768u32 << 16) | 60999u32;
|
||||
|
||||
impl PortManager {
|
||||
pub fn local_port_range() -> (u16, u16) {
|
||||
ProcessManager::current_netns().local_port_range()
|
||||
}
|
||||
|
||||
pub fn set_local_port_range(min: u16, max: u16) -> Result<(), SystemError> {
|
||||
ProcessManager::current_netns().set_local_port_range(min, max)
|
||||
}
|
||||
|
||||
/// @brief 自动分配一个相对应协议中未被使用的PORT,如果动态端口均已被占用,返回错误码 EADDRINUSE
|
||||
pub fn get_ephemeral_port(&self, socket_type: Types) -> Result<u16, SystemError> {
|
||||
// TODO: selects non-conflict high port
|
||||
static EPHEMERAL_PORT: core::sync::atomic::AtomicU16 =
|
||||
core::sync::atomic::AtomicU16::new(0);
|
||||
let initial = (49152 + rand() % (65536 - 49152)) as u16;
|
||||
let _ = EPHEMERAL_PORT.compare_exchange(
|
||||
0,
|
||||
initial,
|
||||
core::sync::atomic::Ordering::AcqRel,
|
||||
core::sync::atomic::Ordering::Relaxed,
|
||||
);
|
||||
static EPHEMERAL_PORT: AtomicU16 = AtomicU16::new(0);
|
||||
let (min, max) = Self::local_port_range();
|
||||
let range = (max - min) as u32 + 1;
|
||||
if range == 0 {
|
||||
return Err(SystemError::EINVAL);
|
||||
}
|
||||
let current = EPHEMERAL_PORT.load(Ordering::Relaxed);
|
||||
if current < min || current > max {
|
||||
let initial = min + (rand() % range as usize) as u16;
|
||||
EPHEMERAL_PORT.store(initial, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
let mut remaining = 65536 - 49152; // 剩余尝试分配端口次数
|
||||
let mut remaining = range;
|
||||
while remaining > 0 {
|
||||
let old = EPHEMERAL_PORT
|
||||
.fetch_update(
|
||||
core::sync::atomic::Ordering::AcqRel,
|
||||
core::sync::atomic::Ordering::Relaxed,
|
||||
|cur| Some(if cur == 65535 { 49152 } else { cur + 1 }),
|
||||
)
|
||||
.fetch_update(Ordering::AcqRel, Ordering::Relaxed, |cur| {
|
||||
let cur = if cur < min || cur > max { min } else { cur };
|
||||
Some(if cur >= max { min } else { cur + 1 })
|
||||
})
|
||||
.unwrap_or_else(|cur| cur);
|
||||
if old == 0 {
|
||||
continue;
|
||||
}
|
||||
let port = if old == 65535 { 49152 } else { old + 1 };
|
||||
let port = if old < min || old >= max {
|
||||
min
|
||||
} else {
|
||||
old + 1
|
||||
};
|
||||
|
||||
// 使用 ListenTable 检查端口是否被占用
|
||||
match socket_type {
|
||||
|
|
@ -83,9 +96,25 @@ impl PortManager {
|
|||
|
||||
#[inline]
|
||||
pub fn bind_ephemeral_port(&self, socket_type: Types) -> Result<u16, SystemError> {
|
||||
let port = self.get_ephemeral_port(socket_type)?;
|
||||
self.bind_port(socket_type, port)?;
|
||||
return Ok(port);
|
||||
let (min, max) = Self::local_port_range();
|
||||
let range = (max - min) as u32 + 1;
|
||||
if range == 0 {
|
||||
return Err(SystemError::EINVAL);
|
||||
}
|
||||
let mut remaining = range;
|
||||
while remaining > 0 {
|
||||
let port = self.get_ephemeral_port(socket_type)?;
|
||||
match self.bind_port(socket_type, port) {
|
||||
Ok(()) => return Ok(port),
|
||||
Err(SystemError::EADDRINUSE) => {
|
||||
// Race: another thread grabbed the port after we checked.
|
||||
remaining -= 1;
|
||||
continue;
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
Err(SystemError::EADDRINUSE)
|
||||
}
|
||||
|
||||
/// UDP: 绑定随机端口(支持 reuseaddr/reuseport 规则)
|
||||
|
|
@ -96,9 +125,25 @@ impl PortManager {
|
|||
reuseport: bool,
|
||||
bind_id: usize,
|
||||
) -> Result<u16, SystemError> {
|
||||
let port = self.get_ephemeral_port(Types::Udp)?;
|
||||
self.bind_udp_port(port, addr, reuseaddr, reuseport, bind_id)?;
|
||||
Ok(port)
|
||||
let (min, max) = Self::local_port_range();
|
||||
let range = (max - min) as u32 + 1;
|
||||
if range == 0 {
|
||||
return Err(SystemError::EINVAL);
|
||||
}
|
||||
let mut remaining = range;
|
||||
while remaining > 0 {
|
||||
let port = self.get_ephemeral_port(Types::Udp)?;
|
||||
match self.bind_udp_port(port, addr, reuseaddr, reuseport, bind_id) {
|
||||
Ok(()) => return Ok(port),
|
||||
Err(SystemError::EADDRINUSE) => {
|
||||
// Race: another thread grabbed the port after we checked.
|
||||
remaining -= 1;
|
||||
continue;
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
Err(SystemError::EADDRINUSE)
|
||||
}
|
||||
|
||||
/// @brief 检测给定端口是否已被占用,如果未被占用则在 TCP 对应的表中记录
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ use crate::net::socket::unix::utils::CmsgBuffer;
|
|||
use crate::net::socket::{AddressFamily, Socket, PMSG, PSO, PSOL};
|
||||
use crate::net::socket::{IpOption, PIPV6};
|
||||
use crate::process::namespace::net_namespace::NetNamespace;
|
||||
use crate::process::namespace::NamespaceOps;
|
||||
use crate::process::ProcessManager;
|
||||
use crate::{libs::rwsem::RwSem, net::socket::endpoint::Endpoint};
|
||||
use alloc::collections::VecDeque;
|
||||
|
|
@ -110,6 +111,8 @@ pub struct UdpSocket {
|
|||
ip_multicast_ttl: AtomicI32,
|
||||
/// IP_MULTICAST_LOOP
|
||||
ip_multicast_loop: AtomicBool,
|
||||
/// IP_MULTICAST_ALL
|
||||
ip_multicast_all: AtomicBool,
|
||||
/// IP_MULTICAST_IF: interface index
|
||||
ip_multicast_ifindex: AtomicI32,
|
||||
/// IP_MULTICAST_IF: interface address (network byte order)
|
||||
|
|
@ -190,6 +193,7 @@ impl UdpSocket {
|
|||
recv_err_v6: AtomicBool::new(false),
|
||||
ip_multicast_ttl: AtomicI32::new(1),
|
||||
ip_multicast_loop: AtomicBool::new(true),
|
||||
ip_multicast_all: AtomicBool::new(true),
|
||||
ip_multicast_ifindex: AtomicI32::new(0),
|
||||
ip_multicast_addr: AtomicU32::new(0),
|
||||
ip_multicast_groups: Mutex::new(Vec::new()),
|
||||
|
|
@ -940,9 +944,11 @@ impl UdpSocket {
|
|||
let octets = addr.octets();
|
||||
let multiaddr = u32::from_ne_bytes(octets);
|
||||
let ifindex = mcast_ifindex.max(ifindex);
|
||||
if multicast_loopback::multicast_registry()
|
||||
.has_membership(multiaddr, ifindex)
|
||||
{
|
||||
if multicast_loopback::multicast_registry().has_membership(
|
||||
self.netns.ns_common().nsid.data(),
|
||||
multiaddr,
|
||||
ifindex,
|
||||
) {
|
||||
udp_bindings::deliver_multicast_all(
|
||||
&self.netns,
|
||||
dest,
|
||||
|
|
@ -1045,9 +1051,11 @@ impl UdpSocket {
|
|||
let multiaddr = u32::from_ne_bytes(octets);
|
||||
let ifindex = self.get_multicast_ifindex();
|
||||
|
||||
if multicast_loopback::multicast_registry()
|
||||
.has_membership(multiaddr, ifindex)
|
||||
{
|
||||
if multicast_loopback::multicast_registry().has_membership(
|
||||
self.netns.ns_common().nsid.data(),
|
||||
multiaddr,
|
||||
ifindex,
|
||||
) {
|
||||
udp_bindings::deliver_multicast_all(
|
||||
&self.netns,
|
||||
dest,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use alloc::sync::Weak;
|
|||
use alloc::vec::Vec;
|
||||
|
||||
use crate::libs::rwsem::RwSem;
|
||||
use crate::process::namespace::NamespaceOps;
|
||||
|
||||
use super::UdpSocket;
|
||||
|
||||
|
|
@ -16,6 +17,8 @@ use super::UdpSocket;
|
|||
struct MulticastMember {
|
||||
/// Weak reference to the socket
|
||||
socket: Weak<UdpSocket>,
|
||||
/// Network namespace id
|
||||
netns_id: usize,
|
||||
/// Multicast group address (in network byte order for IPv4)
|
||||
multiaddr: u32,
|
||||
/// Interface index where the group was joined
|
||||
|
|
@ -36,14 +39,22 @@ impl MulticastLoopbackRegistry {
|
|||
|
||||
/// Register a socket as a member of a multicast group
|
||||
pub fn register(&self, socket: Weak<UdpSocket>, multiaddr: u32, ifindex: i32) {
|
||||
let netns_id = match socket.upgrade() {
|
||||
Some(sock) => sock.netns().ns_common().nsid.data(),
|
||||
None => return,
|
||||
};
|
||||
let mut members = self.members.write();
|
||||
// Check if already registered
|
||||
let exists = members.iter().any(|m| {
|
||||
m.multiaddr == multiaddr && m.ifindex == ifindex && m.socket.as_ptr() == socket.as_ptr()
|
||||
m.multiaddr == multiaddr
|
||||
&& m.ifindex == ifindex
|
||||
&& m.netns_id == netns_id
|
||||
&& m.socket.as_ptr() == socket.as_ptr()
|
||||
});
|
||||
if !exists {
|
||||
members.push(MulticastMember {
|
||||
socket,
|
||||
netns_id,
|
||||
multiaddr,
|
||||
ifindex,
|
||||
});
|
||||
|
|
@ -66,11 +77,14 @@ impl MulticastLoopbackRegistry {
|
|||
members.retain(|m| m.socket.as_ptr() != socket.as_ptr());
|
||||
}
|
||||
|
||||
pub fn has_membership(&self, multiaddr: u32, ifindex: i32) -> bool {
|
||||
pub fn has_membership(&self, netns_id: usize, multiaddr: u32, ifindex: i32) -> bool {
|
||||
let members = self.members.read();
|
||||
members
|
||||
.iter()
|
||||
.any(|m| m.multiaddr == multiaddr && m.ifindex == ifindex)
|
||||
members.iter().any(|m| {
|
||||
m.netns_id == netns_id
|
||||
&& m.multiaddr == multiaddr
|
||||
&& m.ifindex == ifindex
|
||||
&& m.socket.strong_count() > 0
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -408,6 +408,18 @@ impl UdpSocket {
|
|||
self.ip_multicast_loop.store(on, Ordering::Relaxed);
|
||||
Ok(())
|
||||
}
|
||||
IpOption::MULTICAST_ALL => {
|
||||
let v = if val.len() == 1 {
|
||||
val[0] as i32
|
||||
} else if val.len() >= core::mem::size_of::<i32>() {
|
||||
i32::from_ne_bytes([val[0], val[1], val[2], val[3]])
|
||||
} else {
|
||||
return Err(SystemError::EINVAL);
|
||||
};
|
||||
let on = v != 0;
|
||||
self.ip_multicast_all.store(on, Ordering::Relaxed);
|
||||
Ok(())
|
||||
}
|
||||
IpOption::MULTICAST_IF => apply_ipv4_multicast_if(
|
||||
&self.netns,
|
||||
val,
|
||||
|
|
@ -512,6 +524,13 @@ impl UdpSocket {
|
|||
0i32
|
||||
}
|
||||
}
|
||||
IpOption::MULTICAST_ALL => {
|
||||
if self.ip_multicast_all.load(Ordering::Relaxed) {
|
||||
1i32
|
||||
} else {
|
||||
0i32
|
||||
}
|
||||
}
|
||||
IpOption::MULTICAST_IF => self.ip_multicast_addr.load(Ordering::Relaxed) as i32,
|
||||
IpOption::PKTINFO => {
|
||||
if self.recv_pktinfo_v4.load(Ordering::Relaxed) {
|
||||
|
|
|
|||
|
|
@ -109,9 +109,11 @@ pub fn deliver_multicast_all(
|
|||
};
|
||||
let mut delivered = 0;
|
||||
for cand in candidates {
|
||||
if !cand
|
||||
.socket
|
||||
.has_ipv4_multicast_membership(multiaddr, ifindex)
|
||||
let multicast_all = cand.socket.ip_multicast_all.load(Ordering::Relaxed);
|
||||
if !multicast_all
|
||||
&& !cand
|
||||
.socket
|
||||
.has_ipv4_multicast_membership(multiaddr, ifindex)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -327,7 +327,10 @@ impl Connecting {
|
|||
Inner::Connecting(self),
|
||||
Err(SystemError::EAGAIN_OR_EWOULDBLOCK),
|
||||
),
|
||||
ConnectResult::Connected => (Inner::Established(Established::new(self.inner)), Ok(())),
|
||||
ConnectResult::Connected => (
|
||||
Inner::Established(Established::new(self.inner, true)),
|
||||
Ok(()),
|
||||
),
|
||||
ConnectResult::Refused | ConnectResult::RefusedConsumed => {
|
||||
// unbind port
|
||||
self.inner
|
||||
|
|
@ -362,7 +365,7 @@ impl Connecting {
|
|||
/// that the underlying socket is actually in the ESTABLISHED state.
|
||||
/// The caller must ensure that the socket handshake has completed successfully.
|
||||
pub unsafe fn into_established(self) -> Established {
|
||||
Established::new(self.inner)
|
||||
Established::new(self.inner, true)
|
||||
}
|
||||
|
||||
/// Returns `true` when `conn_result` becomes ready, which indicates that the caller should
|
||||
|
|
@ -585,7 +588,7 @@ impl Listening {
|
|||
// TODO is smoltcp socket swappable?
|
||||
core::mem::swap(&mut new_listen, connected);
|
||||
|
||||
return Ok((Established::new(new_listen), remote_endpoint));
|
||||
return Ok((Established::new(new_listen, false), remote_endpoint));
|
||||
}
|
||||
|
||||
pub fn update_io_events(&self, pollee: &AtomicUsize) {
|
||||
|
|
@ -645,10 +648,11 @@ pub struct Established {
|
|||
inner: socket::inet::BoundInner,
|
||||
local: smoltcp::wire::IpEndpoint,
|
||||
peer: smoltcp::wire::IpEndpoint,
|
||||
owns_port: bool,
|
||||
}
|
||||
|
||||
impl Established {
|
||||
pub fn new(inner: socket::inet::BoundInner) -> Self {
|
||||
pub fn new(inner: socket::inet::BoundInner, owns_port: bool) -> Self {
|
||||
let local = inner
|
||||
.with::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.local_endpoint())
|
||||
.unwrap_or(smoltcp::wire::IpEndpoint::new(
|
||||
|
|
@ -661,7 +665,12 @@ impl Established {
|
|||
smoltcp::wire::IpAddress::Ipv4(smoltcp::wire::Ipv4Address::UNSPECIFIED),
|
||||
0,
|
||||
));
|
||||
Self { inner, local, peer }
|
||||
Self {
|
||||
inner,
|
||||
local,
|
||||
peer,
|
||||
owns_port,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_mut<R, F: FnMut(&mut smoltcp::socket::tcp::Socket<'static>) -> R>(
|
||||
|
|
@ -683,6 +692,10 @@ impl Established {
|
|||
self.inner.handle()
|
||||
}
|
||||
|
||||
pub fn owns_port(&self) -> bool {
|
||||
self.owns_port
|
||||
}
|
||||
|
||||
pub fn close(&self) {
|
||||
self.inner
|
||||
.with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.close());
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use crate::net::socket::common::ShutdownBit;
|
||||
use crate::net::socket::inet::InetSocket;
|
||||
use crate::net::socket::inet::Types;
|
||||
use alloc::sync::Arc;
|
||||
use system_error::SystemError;
|
||||
|
||||
|
|
@ -457,6 +458,9 @@ impl TcpSocket {
|
|||
let iface = conn.iface().clone();
|
||||
let me: alloc::sync::Weak<dyn InetSocket> = self.self_ref.clone();
|
||||
conn.close();
|
||||
if conn.owns_port() {
|
||||
iface.port_manager().unbind_port(Types::Tcp, local_port);
|
||||
}
|
||||
iface.common().defer_tcp_close(handle, local_port, me);
|
||||
writer.replace(inner::Inner::Established(conn));
|
||||
}
|
||||
|
|
@ -481,6 +485,9 @@ impl TcpSocket {
|
|||
} else {
|
||||
es.close();
|
||||
}
|
||||
if es.owns_port() {
|
||||
iface.port_manager().unbind_port(Types::Tcp, local_port);
|
||||
}
|
||||
iface.common().defer_tcp_close(handle, local_port, me);
|
||||
writer.replace(inner::Inner::Established(es));
|
||||
}
|
||||
|
|
@ -490,7 +497,10 @@ impl TcpSocket {
|
|||
smoltcp::wire::IpAddress::Ipv6(_) => smoltcp::wire::IpVersion::Ipv6,
|
||||
_ => smoltcp::wire::IpVersion::Ipv4,
|
||||
};
|
||||
let port = sc.get_name().port;
|
||||
let iface = sc.iface().clone();
|
||||
sc.release();
|
||||
iface.port_manager().unbind_port(Types::Tcp, port);
|
||||
writer.replace(inner::Inner::Closed(inner::Closed::new(ver)));
|
||||
}
|
||||
inner::Inner::Listening(mut ls) => {
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ pub enum IpOption {
|
|||
MULTICAST_LOOP = 34,
|
||||
ADD_MEMBERSHIP = 35,
|
||||
DROP_MEMBERSHIP = 36,
|
||||
MULTICAST_ALL = 49,
|
||||
RECVERR_RFC4884 = 26,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,9 +13,7 @@ use alloc::sync::Weak;
|
|||
use alloc::vec::Vec;
|
||||
|
||||
use crate::libs::mutex::Mutex;
|
||||
use crate::net::socket::inet::common::PortManager;
|
||||
use crate::net::socket::inet::InetSocket;
|
||||
use crate::net::socket::inet::Types;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ClosingTcpSocket {
|
||||
|
|
@ -62,11 +60,7 @@ impl TcpCloseDefer {
|
|||
///
|
||||
/// 重要:
|
||||
/// - 锁顺序必须保持为:`SocketSet` -> `TcpCloseDefer::closing`,避免与 close 路径反转。
|
||||
pub fn reap_closed(
|
||||
&self,
|
||||
sockets: &mut smoltcp::iface::SocketSet<'static>,
|
||||
port_manager: &PortManager,
|
||||
) {
|
||||
pub fn reap_closed(&self, sockets: &mut smoltcp::iface::SocketSet<'static>) {
|
||||
let mut closing = self.closing.lock();
|
||||
if closing.is_empty() {
|
||||
return;
|
||||
|
|
@ -75,7 +69,7 @@ impl TcpCloseDefer {
|
|||
while i < closing.len() {
|
||||
let ClosingTcpSocket {
|
||||
handle,
|
||||
local_port,
|
||||
local_port: _local_port,
|
||||
ref sock,
|
||||
} = closing[i];
|
||||
let state = sockets.get::<smoltcp::socket::tcp::Socket>(handle).state();
|
||||
|
|
@ -88,7 +82,6 @@ impl TcpCloseDefer {
|
|||
continue;
|
||||
}
|
||||
sockets.remove(handle);
|
||||
port_manager.unbind_port(Types::Tcp, local_port);
|
||||
closing.swap_remove(i);
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ use alloc::boxed::Box;
|
|||
use alloc::collections::BTreeMap;
|
||||
use alloc::string::{String, ToString};
|
||||
use alloc::sync::{Arc, Weak};
|
||||
use core::sync::atomic::AtomicU32;
|
||||
use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use hashbrown::HashMap;
|
||||
use system_error::SystemError;
|
||||
|
|
@ -90,6 +91,8 @@ pub struct NetNamespace {
|
|||
|
||||
/// AF_UNIX abstract namespace table (scoped to this netns).
|
||||
unix_abstract_table: Arc<UnixAbstractTable>,
|
||||
/// Per-netns IPv4 ephemeral port range (ip_local_port_range)
|
||||
local_port_range: AtomicU32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -136,6 +139,9 @@ impl NetNamespace {
|
|||
netlink_socket_table: NetlinkSocketTable::default(),
|
||||
netlink_kernel_socket: RwLock::new(generate_supported_netlink_kernel_sockets()),
|
||||
unix_abstract_table: unix_abstract_table.clone(),
|
||||
local_port_range: AtomicU32::new(
|
||||
crate::net::socket::inet::common::port::DEFAULT_LOCAL_PORT_RANGE,
|
||||
),
|
||||
});
|
||||
|
||||
// Self::create_polling_thread(netns.clone(), "netns_root".to_string());
|
||||
|
|
@ -168,6 +174,9 @@ impl NetNamespace {
|
|||
netlink_socket_table: NetlinkSocketTable::default(),
|
||||
netlink_kernel_socket: RwLock::new(generate_supported_netlink_kernel_sockets()),
|
||||
unix_abstract_table: unix_abstract_table.clone(),
|
||||
local_port_range: AtomicU32::new(
|
||||
crate::net::socket::inet::common::port::DEFAULT_LOCAL_PORT_RANGE,
|
||||
),
|
||||
});
|
||||
|
||||
// Linux 语义:每个 netns 都需要一个可被唤醒的轮询线程来推进协议栈。
|
||||
|
|
@ -199,6 +208,32 @@ impl NetNamespace {
|
|||
self.device_list.read()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn local_port_range(&self) -> (u16, u16) {
|
||||
let value = self.local_port_range.load(Ordering::Relaxed);
|
||||
((value >> 16) as u16, (value & 0xffff) as u16)
|
||||
}
|
||||
|
||||
pub fn set_local_port_range(&self, min: u16, max: u16) -> Result<(), SystemError> {
|
||||
if min == 0 || max == 0 || min > max {
|
||||
return Err(SystemError::EINVAL);
|
||||
}
|
||||
let new_value = ((min as u32) << 16) | (max as u32);
|
||||
loop {
|
||||
let old_value = self.local_port_range.load(Ordering::Relaxed);
|
||||
if old_value == new_value {
|
||||
return Ok(());
|
||||
}
|
||||
if self
|
||||
.local_port_range
|
||||
.compare_exchange(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn inner(&self) -> RwLockReadGuard<'_, InnerNetNamespace> {
|
||||
self.inner.read()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -583,7 +583,7 @@ impl Rseq {
|
|||
|
||||
// 更新 cpu_id 等字段
|
||||
let cpu_id = current_cpu_id().data() as u32;
|
||||
if let Err(e) = unsafe { access.update_cpu_node_id(cpu_id, 0, 0) } {
|
||||
if let Err(_e) = unsafe { access.update_cpu_node_id(cpu_id, 0, 0) } {
|
||||
// log::debug!("rseq update_cpu_node_id failed: {:?}", e);
|
||||
Self::disable_current_rseq_after_fault(&pcb);
|
||||
let _ = crate::ipc::signal::send_kernel_signal_to_current(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,127 @@
|
|||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static const char *kRangeFile = "/proc/sys/net/ipv4/ip_local_port_range";
|
||||
|
||||
static int read_range(int *min, int *max) {
|
||||
int fd = open(kRangeFile, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
perror("open range file");
|
||||
return -1;
|
||||
}
|
||||
char buf[64];
|
||||
ssize_t n = read(fd, buf, sizeof(buf) - 1);
|
||||
close(fd);
|
||||
if (n <= 0) {
|
||||
perror("read range file");
|
||||
return -1;
|
||||
}
|
||||
buf[n] = '\0';
|
||||
if (sscanf(buf, "%d %d", min, max) != 2) {
|
||||
fprintf(stderr, "failed to parse range: '%s'\n", buf);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int write_range(int min, int max) {
|
||||
int fd = open(kRangeFile, O_WRONLY | O_TRUNC);
|
||||
if (fd < 0) {
|
||||
perror("open range file for write");
|
||||
return -1;
|
||||
}
|
||||
char buf[64];
|
||||
int len = snprintf(buf, sizeof(buf), "%d %d", min, max);
|
||||
if (len <= 0) {
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
ssize_t n = write(fd, buf, (size_t)len);
|
||||
close(fd);
|
||||
if (n != len) {
|
||||
perror("write range file");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
int parent_min = 0, parent_max = 0;
|
||||
if (read_range(&parent_min, &parent_max) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (access(kRangeFile, W_OK) != 0) {
|
||||
printf("[SKIP] %s not writable\n", kRangeFile);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pid_t pid = fork();
|
||||
if (pid < 0) {
|
||||
perror("fork");
|
||||
return 1;
|
||||
}
|
||||
if (pid == 0) {
|
||||
if (unshare(CLONE_NEWNET) != 0) {
|
||||
printf("[SKIP] unshare(CLONE_NEWNET) failed: %s\n", strerror(errno));
|
||||
return 0;
|
||||
}
|
||||
int child_min = 0, child_max = 0;
|
||||
if (read_range(&child_min, &child_max) != 0) {
|
||||
return 1;
|
||||
}
|
||||
int new_min = child_min;
|
||||
int new_max = child_min + 10;
|
||||
if (write_range(new_min, new_max) != 0) {
|
||||
return 1;
|
||||
}
|
||||
int verify_min = 0, verify_max = 0;
|
||||
if (read_range(&verify_min, &verify_max) != 0) {
|
||||
return 1;
|
||||
}
|
||||
if (verify_min != new_min || verify_max != new_max) {
|
||||
fprintf(stderr, "child range verify failed: %d %d (expected %d %d)\n",
|
||||
verify_min, verify_max, new_min, new_max);
|
||||
return 1;
|
||||
}
|
||||
// Restore original range in this netns to keep it tidy.
|
||||
if (write_range(child_min, child_max) != 0) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int status = 0;
|
||||
if (waitpid(pid, &status, 0) < 0) {
|
||||
perror("waitpid");
|
||||
return 1;
|
||||
}
|
||||
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
|
||||
fprintf(stderr, "child failed\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parent_min_after = 0, parent_max_after = 0;
|
||||
if (read_range(&parent_min_after, &parent_max_after) != 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (parent_min_after != parent_min || parent_max_after != parent_max) {
|
||||
fprintf(stderr,
|
||||
"parent range changed: %d %d (expected %d %d)\n",
|
||||
parent_min_after, parent_max_after, parent_min, parent_max);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("[PASS] ip_local_port_range is isolated per netns\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -197,6 +197,7 @@ dependencies = [
|
|||
"chrono",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"libc",
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ regex = "1.0"
|
|||
chrono = { version = "0.4", features = ["serde"] }
|
||||
log = "0.4"
|
||||
env_logger = "0.10"
|
||||
libc = "0.2"
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ use std::{
|
|||
collections::HashSet,
|
||||
fs::{self, File},
|
||||
io::{BufRead, BufReader, Write},
|
||||
os::unix::process::CommandExt,
|
||||
path::{Path, PathBuf},
|
||||
process::Command,
|
||||
sync::{
|
||||
|
|
@ -369,6 +370,19 @@ impl TestRunner {
|
|||
if !blocked_subtests.is_empty() {
|
||||
cmd.arg(format!("--gtest_filter=-{}", blocked_subtests.join(":")));
|
||||
}
|
||||
// Run each test binary in a fresh network namespace to avoid sysctl leakage.
|
||||
let name_lc = test_name.to_ascii_lowercase();
|
||||
if name_lc.contains("socket") || name_lc.contains("net") {
|
||||
unsafe {
|
||||
cmd.pre_exec(|| {
|
||||
let ret = libc::unshare(libc::CLONE_NEWNET);
|
||||
if ret != 0 {
|
||||
return Err(std::io::Error::last_os_error());
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let status = cmd
|
||||
.current_dir(&self.config.tests_dir)
|
||||
|
|
|
|||
|
|
@ -105,6 +105,8 @@ socket_ip_tcp_loopback_non_blocking_test
|
|||
socket_ip_tcp_loopback_test
|
||||
socket_ip_tcp_udp_generic_loopback_test
|
||||
socket_ipv4_datagram_based_socket_unbound_loopback_test
|
||||
socket_ipv4_udp_unbound_loopback_test
|
||||
socket_ipv4_udp_unbound_loopback_nogotsan_test
|
||||
|
||||
# 信号处理测试
|
||||
sigaction_test
|
||||
|
|
|
|||
Loading…
Reference in New Issue