feat(procfs): add /proc/sys/net/ipv4/ip_local_port_range interface (#1749)

* feat(procfs): add /proc/sys/net/ipv4/ip_local_port_range interface

- Add net module to procfs sys directory with IPv4 support
- Implement ip_local_port_range file for reading/writing port range
configuration
- Update PortManager to support configurable local port range with atomic
operations
- Extend ephemeral port allocation to use configured range instead of hardcoded
values
- Add gvisor test cases for IPv4 UDP socket functionality

Signed-off-by: longjin <longjin@DragonOS.org>
This commit is contained in:
LoGin 2026-02-04 15:12:36 +08:00 committed by GitHub
parent 03c943ae77
commit 6e8e8c6411
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 555 additions and 76 deletions

View File

@ -246,6 +246,7 @@ pub struct IfaceCommon {
tcp_close_defer: crate::net::tcp_close_defer::TcpCloseDefer,
/// TCP listener/backlog 语义辅助Linux-like 丢 SYN 等)。
tcp_listener_backlog: crate::net::tcp_listener_backlog::TcpListenerBacklog,
ipv4_multicast_refcnt: Mutex<Vec<(smoltcp::wire::Ipv4Address, usize)>>,
}
impl fmt::Debug for IfaceCommon {
@ -284,6 +285,7 @@ impl IfaceCommon {
packet_sockets: RwLock::new(Vec::new()),
tcp_close_defer: crate::net::tcp_close_defer::TcpCloseDefer::new(),
tcp_listener_backlog: crate::net::tcp_listener_backlog::TcpListenerBacklog::new(),
ipv4_multicast_refcnt: Mutex::new(Vec::new()),
}
}
@ -298,6 +300,38 @@ impl IfaceCommon {
self.tcp_listener_backlog.unregister_tcp_listen_port(port);
}
pub fn ipv4_multicast_join_ref(
&self,
group: smoltcp::wire::Ipv4Address,
) -> Result<(), smoltcp::iface::MulticastError> {
let mut guard = self.ipv4_multicast_refcnt.lock();
if let Some((_, ref mut cnt)) = guard.iter_mut().find(|(g, _)| *g == group) {
*cnt = cnt.saturating_add(1);
return Ok(());
}
self.smol_iface
.lock()
.join_multicast_group(smoltcp::wire::IpAddress::Ipv4(group))?;
guard.push((group, 1));
Ok(())
}
pub fn ipv4_multicast_leave_ref(&self, group: smoltcp::wire::Ipv4Address) {
let mut guard = self.ipv4_multicast_refcnt.lock();
let Some(pos) = guard.iter().position(|(g, _)| *g == group) else {
return;
};
if guard[pos].1 > 1 {
guard[pos].1 -= 1;
return;
}
guard.swap_remove(pos);
let _ = self
.smol_iface
.lock()
.leave_multicast_group(smoltcp::wire::IpAddress::Ipv4(group));
}
/// 驱动收包入口使用的通用丢包策略(避免驱动理解 L4 语义)。
#[inline]
pub fn should_drop_rx_packet(&self, packet: &[u8]) -> bool {
@ -350,8 +384,7 @@ impl IfaceCommon {
// Reclaim TCP sockets that have fully closed.
// Lock order: sockets -> tcp_close_defer (matches close path, which may touch sockets then defer close).
self.tcp_close_defer
.reap_closed(&mut sockets, &self.port_manager);
self.tcp_close_defer.reap_closed(&mut sockets);
// drop sockets here to avoid deadlock
drop(interface);

View File

@ -3,6 +3,7 @@
//! 提供类似 Linux 的 /proc/sys 接口,支持动态配置内核参数
mod kernel;
mod net;
mod vm;
use crate::filesystem::{
@ -14,6 +15,7 @@ use alloc::{
sync::{Arc, Weak},
};
use kernel::KernelDirOps;
use net::NetDirOps;
use system_error::SystemError;
use vm::VmDirOps;
@ -58,6 +60,16 @@ impl DirOps for SysDirOps {
cached_children.insert(name.to_string(), inode.clone());
return Ok(inode);
}
if name == "net" {
let mut cached_children = dir.cached_children().write();
if let Some(child) = cached_children.get(name) {
return Ok(child.clone());
}
let inode = NetDirOps::new_inode(dir.self_ref_weak().clone());
cached_children.insert(name.to_string(), inode.clone());
return Ok(inode);
}
Err(SystemError::ENOENT)
}
@ -70,5 +82,8 @@ impl DirOps for SysDirOps {
cached_children
.entry("vm".to_string())
.or_insert_with(|| VmDirOps::new_inode(dir.self_ref_weak().clone()));
cached_children
.entry("net".to_string())
.or_insert_with(|| NetDirOps::new_inode(dir.self_ref_weak().clone()));
}
}

View File

@ -0,0 +1,108 @@
use crate::filesystem::{
procfs::{
template::{Builder, DirOps, FileOps, ProcDir, ProcDirBuilder, ProcFileBuilder},
utils::proc_read,
},
vfs::{FilePrivateData, IndexNode, InodeMode},
};
use crate::libs::mutex::MutexGuard;
use crate::net::socket::inet::common::port::PortManager;
use alloc::{
format,
string::{String, ToString},
sync::{Arc, Weak},
vec::Vec,
};
use system_error::SystemError;
#[derive(Debug)]
pub struct Ipv4DirOps;
impl Ipv4DirOps {
pub fn new_inode(parent: Weak<dyn IndexNode>) -> Arc<dyn IndexNode> {
ProcDirBuilder::new(Self, InodeMode::from_bits_truncate(0o555))
.parent(parent)
.build()
.unwrap()
}
}
impl DirOps for Ipv4DirOps {
fn lookup_child(
&self,
dir: &ProcDir<Self>,
name: &str,
) -> Result<Arc<dyn IndexNode>, SystemError> {
if name == "ip_local_port_range" {
let mut cached_children = dir.cached_children().write();
if let Some(child) = cached_children.get(name) {
return Ok(child.clone());
}
let inode = IpLocalPortRangeFileOps::new_inode(dir.self_ref_weak().clone());
cached_children.insert(name.to_string(), inode.clone());
return Ok(inode);
}
Err(SystemError::ENOENT)
}
fn populate_children(&self, dir: &ProcDir<Self>) {
let mut cached_children = dir.cached_children().write();
cached_children
.entry("ip_local_port_range".to_string())
.or_insert_with(|| IpLocalPortRangeFileOps::new_inode(dir.self_ref_weak().clone()));
}
}
#[derive(Debug)]
pub struct IpLocalPortRangeFileOps;
impl IpLocalPortRangeFileOps {
pub fn new_inode(parent: Weak<dyn IndexNode>) -> Arc<dyn IndexNode> {
ProcFileBuilder::new(Self, InodeMode::from_bits_truncate(0o644))
.parent(parent)
.build()
.unwrap()
}
fn read_config() -> String {
let (min, max) = PortManager::local_port_range();
format!("{} {}\n", min, max)
}
fn write_config(data: &[u8]) -> Result<usize, SystemError> {
let input = core::str::from_utf8(data).map_err(|_| SystemError::EINVAL)?;
let parts: Vec<&str> = input.split_whitespace().collect();
if parts.len() < 2 {
return Err(SystemError::EINVAL);
}
let min: u16 = parts[0].parse().map_err(|_| SystemError::EINVAL)?;
let max: u16 = parts[1].parse().map_err(|_| SystemError::EINVAL)?;
PortManager::set_local_port_range(min, max)?;
Ok(data.len())
}
}
impl FileOps for IpLocalPortRangeFileOps {
fn read_at(
&self,
offset: usize,
len: usize,
buf: &mut [u8],
_data: MutexGuard<FilePrivateData>,
) -> Result<usize, SystemError> {
let content = Self::read_config();
proc_read(offset, len, buf, content.as_bytes())
}
fn write_at(
&self,
_offset: usize,
_len: usize,
buf: &[u8],
_data: MutexGuard<FilePrivateData>,
) -> Result<usize, SystemError> {
Self::write_config(buf)
}
}

View File

@ -0,0 +1,52 @@
mod ipv4;
use crate::filesystem::{
procfs::template::{DirOps, ProcDir, ProcDirBuilder},
vfs::{IndexNode, InodeMode},
};
use alloc::string::ToString;
use alloc::sync::{Arc, Weak};
use ipv4::Ipv4DirOps;
use system_error::SystemError;
use crate::filesystem::procfs::Builder;
#[derive(Debug)]
pub struct NetDirOps;
impl NetDirOps {
pub fn new_inode(parent: Weak<dyn IndexNode>) -> Arc<dyn IndexNode> {
ProcDirBuilder::new(Self, InodeMode::from_bits_truncate(0o555))
.parent(parent)
.build()
.unwrap()
}
}
impl DirOps for NetDirOps {
fn lookup_child(
&self,
dir: &ProcDir<Self>,
name: &str,
) -> Result<Arc<dyn IndexNode>, SystemError> {
if name == "ipv4" {
let mut cached_children = dir.cached_children().write();
if let Some(child) = cached_children.get(name) {
return Ok(child.clone());
}
let inode = Ipv4DirOps::new_inode(dir.self_ref_weak().clone());
cached_children.insert(name.to_string(), inode.clone());
return Ok(inode);
}
Err(SystemError::ENOENT)
}
fn populate_children(&self, dir: &ProcDir<Self>) {
let mut cached_children = dir.cached_children().write();
cached_children
.entry("ipv4".to_string())
.or_insert_with(|| Ipv4DirOps::new_inode(dir.self_ref_weak().clone()));
}
}

View File

@ -111,10 +111,7 @@ pub fn drop_ipv4_memberships(
if let Some(iface) = find_iface_by_ifindex(netns, entry.ifindex) {
let bytes = entry.multiaddr.to_ne_bytes();
let multi = Ipv4Address::new(bytes[0], bytes[1], bytes[2], bytes[3]);
let _ = iface
.smol_iface()
.lock()
.leave_multicast_group(IpAddress::Ipv4(multi));
iface.common().ipv4_multicast_leave_ref(multi);
}
}
}
@ -247,10 +244,7 @@ pub fn apply_ipv4_membership(
});
}
let join_result = {
let mut smol_iface = iface.smol_iface().lock();
smol_iface.join_multicast_group(IpAddress::Ipv4(multi_ipv4))
};
let join_result = iface.common().ipv4_multicast_join_ref(multi_ipv4);
if let Err(e) = join_result {
{
let mut groups = groups.lock();
@ -265,7 +259,7 @@ pub fn apply_ipv4_membership(
Ok(())
}
IpOption::DROP_MEMBERSHIP => {
let (did_remove, still_joined) = {
let did_remove = {
let mut groups = groups.lock();
let pos = groups.iter().position(|g| {
if g.multiaddr != multi {
@ -281,12 +275,9 @@ pub fn apply_ipv4_membership(
});
if let Some(idx) = pos {
groups.swap_remove(idx);
let still_joined = groups
.iter()
.any(|g| g.multiaddr == multi && g.ifindex == resolved_ifindex);
(true, still_joined)
true
} else {
(false, false)
false
}
};
@ -294,12 +285,7 @@ pub fn apply_ipv4_membership(
return Err(SystemError::EADDRNOTAVAIL);
}
if !still_joined {
let _ = iface
.smol_iface()
.lock()
.leave_multicast_group(IpAddress::Ipv4(multi_ipv4));
}
iface.common().ipv4_multicast_leave_ref(multi_ipv4);
Ok(())
}
_ => Err(SystemError::ENOPROTOOPT),

View File

@ -1,4 +1,5 @@
use alloc::vec::Vec;
use core::sync::atomic::{AtomicU16, Ordering};
use hashbrown::HashMap;
use smoltcp::wire::IpAddress;
use system_error::SystemError;
@ -30,33 +31,45 @@ impl Default for PortManager {
}
}
pub const DEFAULT_LOCAL_PORT_RANGE: u32 = (32768u32 << 16) | 60999u32;
impl PortManager {
pub fn local_port_range() -> (u16, u16) {
ProcessManager::current_netns().local_port_range()
}
pub fn set_local_port_range(min: u16, max: u16) -> Result<(), SystemError> {
ProcessManager::current_netns().set_local_port_range(min, max)
}
/// @brief 自动分配一个相对应协议中未被使用的PORT如果动态端口均已被占用返回错误码 EADDRINUSE
pub fn get_ephemeral_port(&self, socket_type: Types) -> Result<u16, SystemError> {
// TODO: selects non-conflict high port
static EPHEMERAL_PORT: core::sync::atomic::AtomicU16 =
core::sync::atomic::AtomicU16::new(0);
let initial = (49152 + rand() % (65536 - 49152)) as u16;
let _ = EPHEMERAL_PORT.compare_exchange(
0,
initial,
core::sync::atomic::Ordering::AcqRel,
core::sync::atomic::Ordering::Relaxed,
);
static EPHEMERAL_PORT: AtomicU16 = AtomicU16::new(0);
let (min, max) = Self::local_port_range();
let range = (max - min) as u32 + 1;
if range == 0 {
return Err(SystemError::EINVAL);
}
let current = EPHEMERAL_PORT.load(Ordering::Relaxed);
if current < min || current > max {
let initial = min + (rand() % range as usize) as u16;
EPHEMERAL_PORT.store(initial, Ordering::Relaxed);
}
let mut remaining = 65536 - 49152; // 剩余尝试分配端口次数
let mut remaining = range;
while remaining > 0 {
let old = EPHEMERAL_PORT
.fetch_update(
core::sync::atomic::Ordering::AcqRel,
core::sync::atomic::Ordering::Relaxed,
|cur| Some(if cur == 65535 { 49152 } else { cur + 1 }),
)
.fetch_update(Ordering::AcqRel, Ordering::Relaxed, |cur| {
let cur = if cur < min || cur > max { min } else { cur };
Some(if cur >= max { min } else { cur + 1 })
})
.unwrap_or_else(|cur| cur);
if old == 0 {
continue;
}
let port = if old == 65535 { 49152 } else { old + 1 };
let port = if old < min || old >= max {
min
} else {
old + 1
};
// 使用 ListenTable 检查端口是否被占用
match socket_type {
@ -83,9 +96,25 @@ impl PortManager {
#[inline]
pub fn bind_ephemeral_port(&self, socket_type: Types) -> Result<u16, SystemError> {
let port = self.get_ephemeral_port(socket_type)?;
self.bind_port(socket_type, port)?;
return Ok(port);
let (min, max) = Self::local_port_range();
let range = (max - min) as u32 + 1;
if range == 0 {
return Err(SystemError::EINVAL);
}
let mut remaining = range;
while remaining > 0 {
let port = self.get_ephemeral_port(socket_type)?;
match self.bind_port(socket_type, port) {
Ok(()) => return Ok(port),
Err(SystemError::EADDRINUSE) => {
// Race: another thread grabbed the port after we checked.
remaining -= 1;
continue;
}
Err(e) => return Err(e),
}
}
Err(SystemError::EADDRINUSE)
}
/// UDP: 绑定随机端口(支持 reuseaddr/reuseport 规则)
@ -96,9 +125,25 @@ impl PortManager {
reuseport: bool,
bind_id: usize,
) -> Result<u16, SystemError> {
let port = self.get_ephemeral_port(Types::Udp)?;
self.bind_udp_port(port, addr, reuseaddr, reuseport, bind_id)?;
Ok(port)
let (min, max) = Self::local_port_range();
let range = (max - min) as u32 + 1;
if range == 0 {
return Err(SystemError::EINVAL);
}
let mut remaining = range;
while remaining > 0 {
let port = self.get_ephemeral_port(Types::Udp)?;
match self.bind_udp_port(port, addr, reuseaddr, reuseport, bind_id) {
Ok(()) => return Ok(port),
Err(SystemError::EADDRINUSE) => {
// Race: another thread grabbed the port after we checked.
remaining -= 1;
continue;
}
Err(e) => return Err(e),
}
}
Err(SystemError::EADDRINUSE)
}
/// @brief 检测给定端口是否已被占用,如果未被占用则在 TCP 对应的表中记录

View File

@ -15,6 +15,7 @@ use crate::net::socket::unix::utils::CmsgBuffer;
use crate::net::socket::{AddressFamily, Socket, PMSG, PSO, PSOL};
use crate::net::socket::{IpOption, PIPV6};
use crate::process::namespace::net_namespace::NetNamespace;
use crate::process::namespace::NamespaceOps;
use crate::process::ProcessManager;
use crate::{libs::rwsem::RwSem, net::socket::endpoint::Endpoint};
use alloc::collections::VecDeque;
@ -110,6 +111,8 @@ pub struct UdpSocket {
ip_multicast_ttl: AtomicI32,
/// IP_MULTICAST_LOOP
ip_multicast_loop: AtomicBool,
/// IP_MULTICAST_ALL
ip_multicast_all: AtomicBool,
/// IP_MULTICAST_IF: interface index
ip_multicast_ifindex: AtomicI32,
/// IP_MULTICAST_IF: interface address (network byte order)
@ -190,6 +193,7 @@ impl UdpSocket {
recv_err_v6: AtomicBool::new(false),
ip_multicast_ttl: AtomicI32::new(1),
ip_multicast_loop: AtomicBool::new(true),
ip_multicast_all: AtomicBool::new(true),
ip_multicast_ifindex: AtomicI32::new(0),
ip_multicast_addr: AtomicU32::new(0),
ip_multicast_groups: Mutex::new(Vec::new()),
@ -940,9 +944,11 @@ impl UdpSocket {
let octets = addr.octets();
let multiaddr = u32::from_ne_bytes(octets);
let ifindex = mcast_ifindex.max(ifindex);
if multicast_loopback::multicast_registry()
.has_membership(multiaddr, ifindex)
{
if multicast_loopback::multicast_registry().has_membership(
self.netns.ns_common().nsid.data(),
multiaddr,
ifindex,
) {
udp_bindings::deliver_multicast_all(
&self.netns,
dest,
@ -1045,9 +1051,11 @@ impl UdpSocket {
let multiaddr = u32::from_ne_bytes(octets);
let ifindex = self.get_multicast_ifindex();
if multicast_loopback::multicast_registry()
.has_membership(multiaddr, ifindex)
{
if multicast_loopback::multicast_registry().has_membership(
self.netns.ns_common().nsid.data(),
multiaddr,
ifindex,
) {
udp_bindings::deliver_multicast_all(
&self.netns,
dest,

View File

@ -8,6 +8,7 @@ use alloc::sync::Weak;
use alloc::vec::Vec;
use crate::libs::rwsem::RwSem;
use crate::process::namespace::NamespaceOps;
use super::UdpSocket;
@ -16,6 +17,8 @@ use super::UdpSocket;
struct MulticastMember {
/// Weak reference to the socket
socket: Weak<UdpSocket>,
/// Network namespace id
netns_id: usize,
/// Multicast group address (in network byte order for IPv4)
multiaddr: u32,
/// Interface index where the group was joined
@ -36,14 +39,22 @@ impl MulticastLoopbackRegistry {
/// Register a socket as a member of a multicast group
pub fn register(&self, socket: Weak<UdpSocket>, multiaddr: u32, ifindex: i32) {
let netns_id = match socket.upgrade() {
Some(sock) => sock.netns().ns_common().nsid.data(),
None => return,
};
let mut members = self.members.write();
// Check if already registered
let exists = members.iter().any(|m| {
m.multiaddr == multiaddr && m.ifindex == ifindex && m.socket.as_ptr() == socket.as_ptr()
m.multiaddr == multiaddr
&& m.ifindex == ifindex
&& m.netns_id == netns_id
&& m.socket.as_ptr() == socket.as_ptr()
});
if !exists {
members.push(MulticastMember {
socket,
netns_id,
multiaddr,
ifindex,
});
@ -66,11 +77,14 @@ impl MulticastLoopbackRegistry {
members.retain(|m| m.socket.as_ptr() != socket.as_ptr());
}
pub fn has_membership(&self, multiaddr: u32, ifindex: i32) -> bool {
pub fn has_membership(&self, netns_id: usize, multiaddr: u32, ifindex: i32) -> bool {
let members = self.members.read();
members
.iter()
.any(|m| m.multiaddr == multiaddr && m.ifindex == ifindex)
members.iter().any(|m| {
m.netns_id == netns_id
&& m.multiaddr == multiaddr
&& m.ifindex == ifindex
&& m.socket.strong_count() > 0
})
}
}

View File

@ -408,6 +408,18 @@ impl UdpSocket {
self.ip_multicast_loop.store(on, Ordering::Relaxed);
Ok(())
}
IpOption::MULTICAST_ALL => {
let v = if val.len() == 1 {
val[0] as i32
} else if val.len() >= core::mem::size_of::<i32>() {
i32::from_ne_bytes([val[0], val[1], val[2], val[3]])
} else {
return Err(SystemError::EINVAL);
};
let on = v != 0;
self.ip_multicast_all.store(on, Ordering::Relaxed);
Ok(())
}
IpOption::MULTICAST_IF => apply_ipv4_multicast_if(
&self.netns,
val,
@ -512,6 +524,13 @@ impl UdpSocket {
0i32
}
}
IpOption::MULTICAST_ALL => {
if self.ip_multicast_all.load(Ordering::Relaxed) {
1i32
} else {
0i32
}
}
IpOption::MULTICAST_IF => self.ip_multicast_addr.load(Ordering::Relaxed) as i32,
IpOption::PKTINFO => {
if self.recv_pktinfo_v4.load(Ordering::Relaxed) {

View File

@ -109,9 +109,11 @@ pub fn deliver_multicast_all(
};
let mut delivered = 0;
for cand in candidates {
if !cand
.socket
.has_ipv4_multicast_membership(multiaddr, ifindex)
let multicast_all = cand.socket.ip_multicast_all.load(Ordering::Relaxed);
if !multicast_all
&& !cand
.socket
.has_ipv4_multicast_membership(multiaddr, ifindex)
{
continue;
}

View File

@ -327,7 +327,10 @@ impl Connecting {
Inner::Connecting(self),
Err(SystemError::EAGAIN_OR_EWOULDBLOCK),
),
ConnectResult::Connected => (Inner::Established(Established::new(self.inner)), Ok(())),
ConnectResult::Connected => (
Inner::Established(Established::new(self.inner, true)),
Ok(()),
),
ConnectResult::Refused | ConnectResult::RefusedConsumed => {
// unbind port
self.inner
@ -362,7 +365,7 @@ impl Connecting {
/// that the underlying socket is actually in the ESTABLISHED state.
/// The caller must ensure that the socket handshake has completed successfully.
pub unsafe fn into_established(self) -> Established {
Established::new(self.inner)
Established::new(self.inner, true)
}
/// Returns `true` when `conn_result` becomes ready, which indicates that the caller should
@ -585,7 +588,7 @@ impl Listening {
// TODO is smoltcp socket swappable?
core::mem::swap(&mut new_listen, connected);
return Ok((Established::new(new_listen), remote_endpoint));
return Ok((Established::new(new_listen, false), remote_endpoint));
}
pub fn update_io_events(&self, pollee: &AtomicUsize) {
@ -645,10 +648,11 @@ pub struct Established {
inner: socket::inet::BoundInner,
local: smoltcp::wire::IpEndpoint,
peer: smoltcp::wire::IpEndpoint,
owns_port: bool,
}
impl Established {
pub fn new(inner: socket::inet::BoundInner) -> Self {
pub fn new(inner: socket::inet::BoundInner, owns_port: bool) -> Self {
let local = inner
.with::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.local_endpoint())
.unwrap_or(smoltcp::wire::IpEndpoint::new(
@ -661,7 +665,12 @@ impl Established {
smoltcp::wire::IpAddress::Ipv4(smoltcp::wire::Ipv4Address::UNSPECIFIED),
0,
));
Self { inner, local, peer }
Self {
inner,
local,
peer,
owns_port,
}
}
pub fn with_mut<R, F: FnMut(&mut smoltcp::socket::tcp::Socket<'static>) -> R>(
@ -683,6 +692,10 @@ impl Established {
self.inner.handle()
}
pub fn owns_port(&self) -> bool {
self.owns_port
}
pub fn close(&self) {
self.inner
.with_mut::<smoltcp::socket::tcp::Socket, _, _>(|socket| socket.close());

View File

@ -1,5 +1,6 @@
use crate::net::socket::common::ShutdownBit;
use crate::net::socket::inet::InetSocket;
use crate::net::socket::inet::Types;
use alloc::sync::Arc;
use system_error::SystemError;
@ -457,6 +458,9 @@ impl TcpSocket {
let iface = conn.iface().clone();
let me: alloc::sync::Weak<dyn InetSocket> = self.self_ref.clone();
conn.close();
if conn.owns_port() {
iface.port_manager().unbind_port(Types::Tcp, local_port);
}
iface.common().defer_tcp_close(handle, local_port, me);
writer.replace(inner::Inner::Established(conn));
}
@ -481,6 +485,9 @@ impl TcpSocket {
} else {
es.close();
}
if es.owns_port() {
iface.port_manager().unbind_port(Types::Tcp, local_port);
}
iface.common().defer_tcp_close(handle, local_port, me);
writer.replace(inner::Inner::Established(es));
}
@ -490,7 +497,10 @@ impl TcpSocket {
smoltcp::wire::IpAddress::Ipv6(_) => smoltcp::wire::IpVersion::Ipv6,
_ => smoltcp::wire::IpVersion::Ipv4,
};
let port = sc.get_name().port;
let iface = sc.iface().clone();
sc.release();
iface.port_manager().unbind_port(Types::Tcp, port);
writer.replace(inner::Inner::Closed(inner::Closed::new(ver)));
}
inner::Inner::Listening(mut ls) => {

View File

@ -20,6 +20,7 @@ pub enum IpOption {
MULTICAST_LOOP = 34,
ADD_MEMBERSHIP = 35,
DROP_MEMBERSHIP = 36,
MULTICAST_ALL = 49,
RECVERR_RFC4884 = 26,
}

View File

@ -13,9 +13,7 @@ use alloc::sync::Weak;
use alloc::vec::Vec;
use crate::libs::mutex::Mutex;
use crate::net::socket::inet::common::PortManager;
use crate::net::socket::inet::InetSocket;
use crate::net::socket::inet::Types;
#[derive(Debug, Clone)]
struct ClosingTcpSocket {
@ -62,11 +60,7 @@ impl TcpCloseDefer {
///
/// 重要:
/// - 锁顺序必须保持为:`SocketSet` -> `TcpCloseDefer::closing`,避免与 close 路径反转。
pub fn reap_closed(
&self,
sockets: &mut smoltcp::iface::SocketSet<'static>,
port_manager: &PortManager,
) {
pub fn reap_closed(&self, sockets: &mut smoltcp::iface::SocketSet<'static>) {
let mut closing = self.closing.lock();
if closing.is_empty() {
return;
@ -75,7 +69,7 @@ impl TcpCloseDefer {
while i < closing.len() {
let ClosingTcpSocket {
handle,
local_port,
local_port: _local_port,
ref sock,
} = closing[i];
let state = sockets.get::<smoltcp::socket::tcp::Socket>(handle).state();
@ -88,7 +82,6 @@ impl TcpCloseDefer {
continue;
}
sockets.remove(handle);
port_manager.unbind_port(Types::Tcp, local_port);
closing.swap_remove(i);
continue;
}

View File

@ -23,6 +23,7 @@ use alloc::boxed::Box;
use alloc::collections::BTreeMap;
use alloc::string::{String, ToString};
use alloc::sync::{Arc, Weak};
use core::sync::atomic::AtomicU32;
use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use hashbrown::HashMap;
use system_error::SystemError;
@ -90,6 +91,8 @@ pub struct NetNamespace {
/// AF_UNIX abstract namespace table (scoped to this netns).
unix_abstract_table: Arc<UnixAbstractTable>,
/// Per-netns IPv4 ephemeral port range (ip_local_port_range)
local_port_range: AtomicU32,
}
#[derive(Debug)]
@ -136,6 +139,9 @@ impl NetNamespace {
netlink_socket_table: NetlinkSocketTable::default(),
netlink_kernel_socket: RwLock::new(generate_supported_netlink_kernel_sockets()),
unix_abstract_table: unix_abstract_table.clone(),
local_port_range: AtomicU32::new(
crate::net::socket::inet::common::port::DEFAULT_LOCAL_PORT_RANGE,
),
});
// Self::create_polling_thread(netns.clone(), "netns_root".to_string());
@ -168,6 +174,9 @@ impl NetNamespace {
netlink_socket_table: NetlinkSocketTable::default(),
netlink_kernel_socket: RwLock::new(generate_supported_netlink_kernel_sockets()),
unix_abstract_table: unix_abstract_table.clone(),
local_port_range: AtomicU32::new(
crate::net::socket::inet::common::port::DEFAULT_LOCAL_PORT_RANGE,
),
});
// Linux 语义:每个 netns 都需要一个可被唤醒的轮询线程来推进协议栈。
@ -199,6 +208,32 @@ impl NetNamespace {
self.device_list.read()
}
#[inline]
pub fn local_port_range(&self) -> (u16, u16) {
let value = self.local_port_range.load(Ordering::Relaxed);
((value >> 16) as u16, (value & 0xffff) as u16)
}
pub fn set_local_port_range(&self, min: u16, max: u16) -> Result<(), SystemError> {
if min == 0 || max == 0 || min > max {
return Err(SystemError::EINVAL);
}
let new_value = ((min as u32) << 16) | (max as u32);
loop {
let old_value = self.local_port_range.load(Ordering::Relaxed);
if old_value == new_value {
return Ok(());
}
if self
.local_port_range
.compare_exchange(old_value, new_value, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
return Ok(());
}
}
}
pub fn inner(&self) -> RwLockReadGuard<'_, InnerNetNamespace> {
self.inner.read()
}

View File

@ -583,7 +583,7 @@ impl Rseq {
// 更新 cpu_id 等字段
let cpu_id = current_cpu_id().data() as u32;
if let Err(e) = unsafe { access.update_cpu_node_id(cpu_id, 0, 0) } {
if let Err(_e) = unsafe { access.update_cpu_node_id(cpu_id, 0, 0) } {
// log::debug!("rseq update_cpu_node_id failed: {:?}", e);
Self::disable_current_rseq_after_fault(&pcb);
let _ = crate::ipc::signal::send_kernel_signal_to_current(

View File

@ -0,0 +1,127 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
static const char *kRangeFile = "/proc/sys/net/ipv4/ip_local_port_range";
static int read_range(int *min, int *max) {
int fd = open(kRangeFile, O_RDONLY);
if (fd < 0) {
perror("open range file");
return -1;
}
char buf[64];
ssize_t n = read(fd, buf, sizeof(buf) - 1);
close(fd);
if (n <= 0) {
perror("read range file");
return -1;
}
buf[n] = '\0';
if (sscanf(buf, "%d %d", min, max) != 2) {
fprintf(stderr, "failed to parse range: '%s'\n", buf);
return -1;
}
return 0;
}
static int write_range(int min, int max) {
int fd = open(kRangeFile, O_WRONLY | O_TRUNC);
if (fd < 0) {
perror("open range file for write");
return -1;
}
char buf[64];
int len = snprintf(buf, sizeof(buf), "%d %d", min, max);
if (len <= 0) {
close(fd);
return -1;
}
ssize_t n = write(fd, buf, (size_t)len);
close(fd);
if (n != len) {
perror("write range file");
return -1;
}
return 0;
}
int main(void) {
int parent_min = 0, parent_max = 0;
if (read_range(&parent_min, &parent_max) != 0) {
return 1;
}
if (access(kRangeFile, W_OK) != 0) {
printf("[SKIP] %s not writable\n", kRangeFile);
return 0;
}
pid_t pid = fork();
if (pid < 0) {
perror("fork");
return 1;
}
if (pid == 0) {
if (unshare(CLONE_NEWNET) != 0) {
printf("[SKIP] unshare(CLONE_NEWNET) failed: %s\n", strerror(errno));
return 0;
}
int child_min = 0, child_max = 0;
if (read_range(&child_min, &child_max) != 0) {
return 1;
}
int new_min = child_min;
int new_max = child_min + 10;
if (write_range(new_min, new_max) != 0) {
return 1;
}
int verify_min = 0, verify_max = 0;
if (read_range(&verify_min, &verify_max) != 0) {
return 1;
}
if (verify_min != new_min || verify_max != new_max) {
fprintf(stderr, "child range verify failed: %d %d (expected %d %d)\n",
verify_min, verify_max, new_min, new_max);
return 1;
}
// Restore original range in this netns to keep it tidy.
if (write_range(child_min, child_max) != 0) {
return 1;
}
return 0;
}
int status = 0;
if (waitpid(pid, &status, 0) < 0) {
perror("waitpid");
return 1;
}
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
fprintf(stderr, "child failed\n");
return 1;
}
int parent_min_after = 0, parent_max_after = 0;
if (read_range(&parent_min_after, &parent_max_after) != 0) {
return 1;
}
if (parent_min_after != parent_min || parent_max_after != parent_max) {
fprintf(stderr,
"parent range changed: %d %d (expected %d %d)\n",
parent_min_after, parent_max_after, parent_min, parent_max);
return 1;
}
printf("[PASS] ip_local_port_range is isolated per netns\n");
return 0;
}

View File

@ -197,6 +197,7 @@ dependencies = [
"chrono",
"clap",
"env_logger",
"libc",
"log",
"regex",
]

View File

@ -14,6 +14,7 @@ regex = "1.0"
chrono = { version = "0.4", features = ["serde"] }
log = "0.4"
env_logger = "0.10"
libc = "0.2"
[profile.release]
lto = true

View File

@ -4,6 +4,7 @@ use std::{
collections::HashSet,
fs::{self, File},
io::{BufRead, BufReader, Write},
os::unix::process::CommandExt,
path::{Path, PathBuf},
process::Command,
sync::{
@ -369,6 +370,19 @@ impl TestRunner {
if !blocked_subtests.is_empty() {
cmd.arg(format!("--gtest_filter=-{}", blocked_subtests.join(":")));
}
// Run each test binary in a fresh network namespace to avoid sysctl leakage.
let name_lc = test_name.to_ascii_lowercase();
if name_lc.contains("socket") || name_lc.contains("net") {
unsafe {
cmd.pre_exec(|| {
let ret = libc::unshare(libc::CLONE_NEWNET);
if ret != 0 {
return Err(std::io::Error::last_os_error());
}
Ok(())
});
}
}
let status = cmd
.current_dir(&self.config.tests_dir)

View File

@ -105,6 +105,8 @@ socket_ip_tcp_loopback_non_blocking_test
socket_ip_tcp_loopback_test
socket_ip_tcp_udp_generic_loopback_test
socket_ipv4_datagram_based_socket_unbound_loopback_test
socket_ipv4_udp_unbound_loopback_test
socket_ipv4_udp_unbound_loopback_nogotsan_test
# 信号处理测试
sigaction_test