From f36191f4080bd43becfc005dfbd865bf01f4e6e4 Mon Sep 17 00:00:00 2001 From: LoGin Date: Wed, 28 Jan 2026 22:01:34 +0800 Subject: [PATCH] feat(apic): add calibration for APIC timer initial count (#1715) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(apic): add calibration for APIC timer initial count Signed-off-by: longjin * fix(time): 修复next_n_us_timer_jiffies函数计算错误 - 修正了微秒转jiffies时未向上取整导致的精度问题 Signed-off-by: longjin * fix(net): defer tcp shutdown FIN until tx queue drains Signed-off-by: longjin * fix(poll): use protected user buffers and always write revents Signed-off-by: longjin --------- Co-authored-by: jiangyuming Signed-off-by: longjin --- .../src/arch/x86_64/driver/apic/apic_timer.rs | 92 ++++++++++++++++++- kernel/src/arch/x86_64/driver/apic/mod.rs | 17 ++++ kernel/src/filesystem/poll.rs | 71 +++++++++++++- kernel/src/filesystem/vfs/syscall/sys_poll.rs | 12 ++- .../src/filesystem/vfs/syscall/sys_ppoll.rs | 12 ++- kernel/src/net/socket/inet/stream/events.rs | 15 +++ .../src/net/socket/inet/stream/lifecycle.rs | 9 +- kernel/src/time/timer.rs | 5 +- 8 files changed, 218 insertions(+), 15 deletions(-) diff --git a/kernel/src/arch/x86_64/driver/apic/apic_timer.rs b/kernel/src/arch/x86_64/driver/apic/apic_timer.rs index 1a18e5436..6ca043581 100644 --- a/kernel/src/arch/x86_64/driver/apic/apic_timer.rs +++ b/kernel/src/arch/x86_64/driver/apic/apic_timer.rs @@ -1,9 +1,15 @@ use core::cell::RefCell; +use core::hint::spin_loop; use core::sync::atomic::{fence, Ordering}; -use crate::arch::driver::tsc::TSCManager; +use crate::arch::driver::{ + hpet::{hpet_instance, is_hpet_enabled}, + tsc::TSCManager, +}; use crate::arch::interrupt::TrapFrame; +use crate::driver::acpi::pmtmr::{acpi_pm_read_early, ACPI_PM_MASK, PMTMR_TICKS_PER_SEC}; use crate::driver::base::device::DeviceId; +use crate::driver::clocksource::acpi_pm::PMTMR_IO_PORT; use crate::exception::irqdata::{IrqHandlerData, IrqLineStatus}; use crate::exception::irqdesc::{ irq_desc_manager, IrqDesc, IrqFlowHandler, IrqHandleFlags, IrqHandler, IrqReturn, @@ -29,6 +35,7 @@ use super::xapic::XApicOffset; use super::{CurrentApic, LVTRegister, LocalAPIC, LVT}; pub const APIC_TIMER_IRQ_NUM: IrqNumber = IrqNumber::new(151); +const APIC_CALIBRATE_MS: u64 = 10; static mut LOCAL_APIC_TIMERS: [RefCell; PerCpu::MAX_CPU_NUM as usize] = [const { RefCell::new(LocalApicTimer::new()) }; PerCpu::MAX_CPU_NUM as usize]; @@ -108,9 +115,12 @@ fn init_bsp_apic_timer() { debug!("init_bsp_apic_timer"); assert!(smp_get_processor_id().data() == 0); let mut local_apic_timer = local_apic_timer_instance_mut(ProcessorId::new(0)); + let initial_count = local_apic_timer + .calibrate_initial_count() + .unwrap_or_else(LocalApicTimer::periodic_default_initial_count); local_apic_timer.init( LocalApicTimerMode::Periodic, - LocalApicTimer::periodic_default_initial_count(), + initial_count, LocalApicTimer::DIVISOR as u32, ); debug!("init_bsp_apic_timer done"); @@ -122,9 +132,12 @@ fn init_ap_apic_timer() { assert!(cpu_id.data() != 0); let mut local_apic_timer = local_apic_timer_instance_mut(cpu_id); + let initial_count = local_apic_timer + .calibrate_initial_count() + .unwrap_or_else(LocalApicTimer::periodic_default_initial_count); local_apic_timer.init( LocalApicTimerMode::Periodic, - LocalApicTimer::periodic_default_initial_count(), + initial_count, LocalApicTimer::DIVISOR as u32, ); debug!("init_ap_apic_timer done"); @@ -208,6 +221,57 @@ impl LocalApicTimer { return count; } + fn calibrate_initial_count(&mut self) -> Option { + self.stop_current(); + self.set_divisor(Self::DIVISOR as u32); + self.set_initial_cnt(u32::MAX as u64); + self.set_lvt_masked(LocalApicTimerMode::Periodic); + self.start_current(); + + if is_hpet_enabled() { + let period_fs = hpet_instance().period(); + if period_fs == 0 { + return None; + } + let start = hpet_instance().main_counter_value(); + let target_ticks = (APIC_CALIBRATE_MS * 1_000_000u64 * 1_000_000u64) / period_fs; + while (hpet_instance().main_counter_value() - start) < target_ticks { + spin_loop(); + } + } else if PMTMR_IO_PORT.load(Ordering::SeqCst) != 0 { + let start = acpi_pm_read_early() as u64; + let target_ticks = PMTMR_TICKS_PER_SEC * APIC_CALIBRATE_MS / 1000; + while ((acpi_pm_read_early() as u64).wrapping_sub(start) & ACPI_PM_MASK) < target_ticks + { + spin_loop(); + } + } else { + let start = TSCManager::cpu_khz(); + if start == 0 { + return None; + } + let tsc_start = unsafe { x86::time::rdtsc() }; + let mut tsc_now = tsc_start; + let target = start * APIC_CALIBRATE_MS; + while (tsc_now - tsc_start) < target { + tsc_now = unsafe { x86::time::rdtsc() }; + } + } + + let cur = CurrentApic.read_timer_current_count() as u64; + self.stop_current(); + + let elapsed = (u32::MAX as u64).saturating_sub(cur); + if elapsed == 0 { + return None; + } + + let apic_khz = elapsed / APIC_CALIBRATE_MS; + // apic_khz already accounts for the programmed divider. + let initial_count = apic_khz * Self::INTERVAL_MS; + Some(initial_count) + } + /// Init this manager. /// /// At this time, it does nothing. @@ -266,6 +330,10 @@ impl LocalApicTimer { CurrentApic.set_lvt(lvt); } + fn set_lvt_masked(&mut self, mode: LocalApicTimerMode) { + self.setup_lvt(APIC_TIMER_IRQ_NUM.data() as u8, true, mode); + } + /// 检查是否支持TSC-Deadline /// /// 此函数调用cpuid,请避免多次调用此函数。 @@ -306,11 +374,25 @@ impl TryFrom for LocalApicTimerMode { impl CurrentApic { fn set_timer_divisor(&self, divisor: u32) { + let div_conf = match divisor { + 1 => 0b1011, + 2 => 0b0000, + 4 => 0b0001, + 8 => 0b0010, + 16 => 0b0011, + 32 => 0b1000, + 64 => 0b1001, + 128 => 0b1010, + _ => 0b1011, + }; if self.x2apic_enabled() { - unsafe { wrmsr(IA32_X2APIC_DIV_CONF, divisor.into()) }; + unsafe { wrmsr(IA32_X2APIC_DIV_CONF, div_conf.into()) }; } else { unsafe { - self.write_xapic_register(XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CLKDIV, divisor) + self.write_xapic_register( + XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CLKDIV, + div_conf, + ) }; } } diff --git a/kernel/src/arch/x86_64/driver/apic/mod.rs b/kernel/src/arch/x86_64/driver/apic/mod.rs index 1a20ac6e1..85938b499 100644 --- a/kernel/src/arch/x86_64/driver/apic/mod.rs +++ b/kernel/src/arch/x86_64/driver/apic/mod.rs @@ -480,6 +480,23 @@ impl CurrentApic { } } + pub(self) unsafe fn read_xapic_register(&self, reg: XApicOffset) -> u32 { + if let Some(xapic) = current_xapic_instance().borrow_mut().as_mut() { + return xapic.read(reg); + } + 0 + } + + pub(super) fn read_timer_current_count(&self) -> u32 { + let reg = XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CURRENT_COUNT_REG as u32; + if self.x2apic_enabled() { + return unsafe { rdmsr(APIC_BASE_MSR + (reg >> 4)) as u32 }; + } + unsafe { + self.read_xapic_register(XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CURRENT_COUNT_REG) + } + } + /// 屏蔽类8259A芯片 unsafe fn mask8259a(&self) { CurrentPortIOArch::out8(0x21, 0xff); diff --git a/kernel/src/filesystem/poll.rs b/kernel/src/filesystem/poll.rs index e18714eef..d1145f1e5 100644 --- a/kernel/src/filesystem/poll.rs +++ b/kernel/src/filesystem/poll.rs @@ -6,6 +6,7 @@ use crate::{ libs::wait_queue::{TimeoutWaker, Waiter}, process::ProcessManager, syscall::user_access::UserBufferWriter, + syscall::user_buffer::UserBuffer, time::{ syscall::PosixTimeval, timer::{next_n_us_timer_jiffies, Timer}, @@ -15,16 +16,77 @@ use crate::{ use super::vfs::file::{File, FileFlags}; use alloc::sync::Arc; +use alloc::vec::Vec; use system_error::SystemError; #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Clone, Copy)] pub struct PollFd { pub fd: c_int, pub events: u16, pub revents: u16, } +#[inline] +fn pollfd_revents_offset() -> usize { + let base = core::ptr::null::(); + unsafe { core::ptr::addr_of!((*base).revents) as usize - base as usize } +} + +pub(crate) fn read_pollfds_from_user( + user_buf: &mut UserBuffer<'_>, + nfds: usize, +) -> Result, SystemError> { + let elem_size = core::mem::size_of::(); + let total_len = nfds.checked_mul(elem_size).ok_or(SystemError::EINVAL)?; + if user_buf.len() < total_len { + return Err(SystemError::EFAULT); + } + + let mut poll_fds = vec![ + PollFd { + fd: 0, + events: 0, + revents: 0 + }; + nfds + ]; + let dst_bytes = + unsafe { core::slice::from_raw_parts_mut(poll_fds.as_mut_ptr() as *mut u8, total_len) }; + let copied = user_buf.read_from_user(0, dst_bytes)?; + if copied != total_len { + return Err(SystemError::EFAULT); + } + Ok(poll_fds) +} + +pub(crate) fn write_pollfds_revents_to_user( + user_buf: &mut UserBuffer<'_>, + poll_fds: &[PollFd], +) -> Result<(), SystemError> { + let elem_size = core::mem::size_of::(); + let total_len = poll_fds + .len() + .checked_mul(elem_size) + .ok_or(SystemError::EINVAL)?; + if user_buf.len() < total_len { + return Err(SystemError::EFAULT); + } + let revents_off = pollfd_revents_offset(); + for (i, pollfd) in poll_fds.iter().enumerate() { + let off = i + .checked_mul(elem_size) + .and_then(|v| v.checked_add(revents_off)) + .ok_or(SystemError::EINVAL)?; + let bytes = pollfd.revents.to_ne_bytes(); + let written = user_buf.write_to_user(off, &bytes)?; + if written != bytes.len() { + return Err(SystemError::EFAULT); + } + } + Ok(()) +} + struct PollAdapter<'a> { ep_file: Arc, poll_fds: &'a mut [PollFd], @@ -492,7 +554,12 @@ impl RestartFn for RestartFnPoll { let mut poll_fds_writer = UserBufferWriter::new(d.pollfd_ptr.as_ptr::(), len, true)?; - let mut r = do_sys_poll(poll_fds_writer.buffer(0)?, d.timeout_instant); + let mut user_buf = poll_fds_writer.buffer_protected(0)?; + let mut poll_fds = read_pollfds_from_user(&mut user_buf, d.nfds as usize)?; + let mut r = do_sys_poll(&mut poll_fds, d.timeout_instant); + if let Err(e) = write_pollfds_revents_to_user(&mut user_buf, &poll_fds) { + r = Err(e); + } if let Err(SystemError::ERESTARTNOHAND) = r { let restart_block = RestartBlock::new(&RestartFnPoll, data.clone()); r = ProcessManager::current_pcb().set_restart_fn(Some(restart_block)); diff --git a/kernel/src/filesystem/vfs/syscall/sys_poll.rs b/kernel/src/filesystem/vfs/syscall/sys_poll.rs index 5820d26f5..8dbd35bff 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_poll.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_poll.rs @@ -1,6 +1,9 @@ use crate::arch::interrupt::TrapFrame; use crate::arch::syscall::nr::SYS_POLL; -use crate::filesystem::poll::{do_sys_poll, poll_select_set_timeout, PollFd, RestartFnPoll}; +use crate::filesystem::poll::{ + do_sys_poll, poll_select_set_timeout, read_pollfds_from_user, write_pollfds_revents_to_user, + PollFd, RestartFnPoll, +}; use crate::ipc::signal::{RestartBlock, RestartBlockData}; use crate::mm::VirtAddr; use crate::process::resource::RLimitID; @@ -133,7 +136,12 @@ pub fn do_poll(pollfd_ptr: usize, nfds: u32, timeout_ms: i32) -> Result(), len, true)?; - let mut r = do_sys_poll(poll_fds_writer.buffer(0)?, timeout); + let mut user_buf = poll_fds_writer.buffer_protected(0)?; + let mut poll_fds = read_pollfds_from_user(&mut user_buf, nfds as usize)?; + let mut r = do_sys_poll(&mut poll_fds, timeout); + if let Err(e) = write_pollfds_revents_to_user(&mut user_buf, &poll_fds) { + r = Err(e); + } if let Err(SystemError::ERESTARTNOHAND) = r { let restart_block_data = RestartBlockData::new_poll(pollfd_ptr, nfds, timeout); let restart_block = RestartBlock::new(&RestartFnPoll, restart_block_data); diff --git a/kernel/src/filesystem/vfs/syscall/sys_ppoll.rs b/kernel/src/filesystem/vfs/syscall/sys_ppoll.rs index 10af53d00..c585897d6 100644 --- a/kernel/src/filesystem/vfs/syscall/sys_ppoll.rs +++ b/kernel/src/filesystem/vfs/syscall/sys_ppoll.rs @@ -2,7 +2,8 @@ use crate::arch::interrupt::TrapFrame; use crate::arch::ipc::signal::SigSet; use crate::arch::syscall::nr::SYS_PPOLL; use crate::filesystem::poll::{ - do_sys_poll, poll_select_finish, poll_select_set_timeout, PollFd, PollTimeType, + do_sys_poll, poll_select_finish, poll_select_set_timeout, read_pollfds_from_user, + write_pollfds_revents_to_user, PollFd, PollTimeType, }; use crate::ipc::signal::set_user_sigmask; use crate::mm::VirtAddr; @@ -190,8 +191,13 @@ pub fn do_ppoll( use crate::syscall::user_access::UserBufferWriter; let mut poll_fds_writer = UserBufferWriter::new(pollfd_ptr.as_ptr::(), pollfds_len, true)?; - let poll_fds = poll_fds_writer.buffer(0)?; - do_sys_poll(poll_fds, timeout_ts) + let mut user_buf = poll_fds_writer.buffer_protected(0)?; + let mut poll_fds = read_pollfds_from_user(&mut user_buf, nfds as usize)?; + let mut r = do_sys_poll(&mut poll_fds, timeout_ts); + if let Err(e) = write_pollfds_revents_to_user(&mut user_buf, &poll_fds) { + r = Err(e); + } + r }; return poll_select_finish(timeout_ts, timespec_ptr, PollTimeType::TimeSpec, r); diff --git a/kernel/src/net/socket/inet/stream/events.rs b/kernel/src/net/socket/inet/stream/events.rs index 7f3d1cfe4..ef358c190 100644 --- a/kernel/src/net/socket/inet/stream/events.rs +++ b/kernel/src/net/socket/inet/stream/events.rs @@ -44,6 +44,21 @@ impl TcpSocket { Some(inner::Inner::Established(established)) => { established.update_io_events(&self.pollee); + // If SHUT_WR was requested while there were pending TX bytes, send FIN once + // the TX queue drains to preserve Linux-like semantics. + if self.is_send_shutdown() + && self + .send_fin_deferred + .load(core::sync::atomic::Ordering::Relaxed) + { + let pending = established.with(|socket| socket.send_queue()); + if pending == 0 { + established.with_mut(|socket| socket.close()); + self.send_fin_deferred + .store(false, core::sync::atomic::Ordering::Relaxed); + } + } + // If SHUT_WR, set EPOLLOUT so send() wakes up and returns EPIPE. if self.is_send_shutdown() { self.pollee.fetch_or( diff --git a/kernel/src/net/socket/inet/stream/lifecycle.rs b/kernel/src/net/socket/inet/stream/lifecycle.rs index bcee25bee..74f6314cf 100644 --- a/kernel/src/net/socket/inet/stream/lifecycle.rs +++ b/kernel/src/net/socket/inet/stream/lifecycle.rs @@ -288,11 +288,16 @@ impl TcpSocket { } if how.contains(ShutdownBit::SHUT_WR) { - if self + let pending = established.with(|socket| socket.send_queue()); + if pending > 0 { + // Defer FIN until all queued data has been sent. + self.send_fin_deferred + .store(true, core::sync::atomic::Ordering::Relaxed); + } else if self .send_fin_deferred .load(core::sync::atomic::Ordering::Relaxed) { - // FIN will be sent once cork-buffered bytes are flushed. + // FIN will be sent once deferred bytes are fully flushed. } else { established.with_mut(|socket| socket.close()); } diff --git a/kernel/src/time/timer.rs b/kernel/src/time/timer.rs index 42064ae7d..1ad659ce2 100644 --- a/kernel/src/time/timer.rs +++ b/kernel/src/time/timer.rs @@ -290,7 +290,10 @@ pub fn next_n_ms_timer_jiffies(expire_ms: u64) -> u64 { } /// 计算接下来n微秒对应的定时器时间片 pub fn next_n_us_timer_jiffies(expire_us: u64) -> u64 { - return TIMER_JIFFIES.load(Ordering::SeqCst) + expire_us * 1000 / NSEC_PER_JIFFY as u64; + let now = TIMER_JIFFIES.load(Ordering::SeqCst); + let ns = expire_us * 1000; + let jiffies = ns.div_ceil(NSEC_PER_JIFFY as u64); + now + jiffies } /// @brief 让pcb休眠timeout个jiffies