feat(apic): add calibration for APIC timer initial count (#1715)

* feat(apic): add calibration for APIC timer initial count

Signed-off-by: longjin <longjin@DragonOS.org>

* fix(time): 修复next_n_us_timer_jiffies函数计算错误

- 修正了微秒转jiffies时未向上取整导致的精度问题

Signed-off-by: longjin <longjin@DragonOS.org>

* fix(net): defer tcp shutdown FIN until tx queue drains

Signed-off-by: longjin <longjin@DragonOS.org>

* fix(poll): use protected user buffers and always write revents

Signed-off-by: longjin <longjin@DragonOS.org>

---------

Co-authored-by: jiangyuming <mingjiangyu1@qq.com>
Signed-off-by: longjin <longjin@DragonOS.org>
This commit is contained in:
LoGin 2026-01-28 22:01:34 +08:00 committed by GitHub
parent 832c04a23f
commit f36191f408
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 218 additions and 15 deletions

View File

@ -1,9 +1,15 @@
use core::cell::RefCell;
use core::hint::spin_loop;
use core::sync::atomic::{fence, Ordering};
use crate::arch::driver::tsc::TSCManager;
use crate::arch::driver::{
hpet::{hpet_instance, is_hpet_enabled},
tsc::TSCManager,
};
use crate::arch::interrupt::TrapFrame;
use crate::driver::acpi::pmtmr::{acpi_pm_read_early, ACPI_PM_MASK, PMTMR_TICKS_PER_SEC};
use crate::driver::base::device::DeviceId;
use crate::driver::clocksource::acpi_pm::PMTMR_IO_PORT;
use crate::exception::irqdata::{IrqHandlerData, IrqLineStatus};
use crate::exception::irqdesc::{
irq_desc_manager, IrqDesc, IrqFlowHandler, IrqHandleFlags, IrqHandler, IrqReturn,
@ -29,6 +35,7 @@ use super::xapic::XApicOffset;
use super::{CurrentApic, LVTRegister, LocalAPIC, LVT};
pub const APIC_TIMER_IRQ_NUM: IrqNumber = IrqNumber::new(151);
const APIC_CALIBRATE_MS: u64 = 10;
static mut LOCAL_APIC_TIMERS: [RefCell<LocalApicTimer>; PerCpu::MAX_CPU_NUM as usize] =
[const { RefCell::new(LocalApicTimer::new()) }; PerCpu::MAX_CPU_NUM as usize];
@ -108,9 +115,12 @@ fn init_bsp_apic_timer() {
debug!("init_bsp_apic_timer");
assert!(smp_get_processor_id().data() == 0);
let mut local_apic_timer = local_apic_timer_instance_mut(ProcessorId::new(0));
let initial_count = local_apic_timer
.calibrate_initial_count()
.unwrap_or_else(LocalApicTimer::periodic_default_initial_count);
local_apic_timer.init(
LocalApicTimerMode::Periodic,
LocalApicTimer::periodic_default_initial_count(),
initial_count,
LocalApicTimer::DIVISOR as u32,
);
debug!("init_bsp_apic_timer done");
@ -122,9 +132,12 @@ fn init_ap_apic_timer() {
assert!(cpu_id.data() != 0);
let mut local_apic_timer = local_apic_timer_instance_mut(cpu_id);
let initial_count = local_apic_timer
.calibrate_initial_count()
.unwrap_or_else(LocalApicTimer::periodic_default_initial_count);
local_apic_timer.init(
LocalApicTimerMode::Periodic,
LocalApicTimer::periodic_default_initial_count(),
initial_count,
LocalApicTimer::DIVISOR as u32,
);
debug!("init_ap_apic_timer done");
@ -208,6 +221,57 @@ impl LocalApicTimer {
return count;
}
fn calibrate_initial_count(&mut self) -> Option<u64> {
self.stop_current();
self.set_divisor(Self::DIVISOR as u32);
self.set_initial_cnt(u32::MAX as u64);
self.set_lvt_masked(LocalApicTimerMode::Periodic);
self.start_current();
if is_hpet_enabled() {
let period_fs = hpet_instance().period();
if period_fs == 0 {
return None;
}
let start = hpet_instance().main_counter_value();
let target_ticks = (APIC_CALIBRATE_MS * 1_000_000u64 * 1_000_000u64) / period_fs;
while (hpet_instance().main_counter_value() - start) < target_ticks {
spin_loop();
}
} else if PMTMR_IO_PORT.load(Ordering::SeqCst) != 0 {
let start = acpi_pm_read_early() as u64;
let target_ticks = PMTMR_TICKS_PER_SEC * APIC_CALIBRATE_MS / 1000;
while ((acpi_pm_read_early() as u64).wrapping_sub(start) & ACPI_PM_MASK) < target_ticks
{
spin_loop();
}
} else {
let start = TSCManager::cpu_khz();
if start == 0 {
return None;
}
let tsc_start = unsafe { x86::time::rdtsc() };
let mut tsc_now = tsc_start;
let target = start * APIC_CALIBRATE_MS;
while (tsc_now - tsc_start) < target {
tsc_now = unsafe { x86::time::rdtsc() };
}
}
let cur = CurrentApic.read_timer_current_count() as u64;
self.stop_current();
let elapsed = (u32::MAX as u64).saturating_sub(cur);
if elapsed == 0 {
return None;
}
let apic_khz = elapsed / APIC_CALIBRATE_MS;
// apic_khz already accounts for the programmed divider.
let initial_count = apic_khz * Self::INTERVAL_MS;
Some(initial_count)
}
/// Init this manager.
///
/// At this time, it does nothing.
@ -266,6 +330,10 @@ impl LocalApicTimer {
CurrentApic.set_lvt(lvt);
}
fn set_lvt_masked(&mut self, mode: LocalApicTimerMode) {
self.setup_lvt(APIC_TIMER_IRQ_NUM.data() as u8, true, mode);
}
/// 检查是否支持TSC-Deadline
///
/// 此函数调用cpuid请避免多次调用此函数。
@ -306,11 +374,25 @@ impl TryFrom<u8> for LocalApicTimerMode {
impl CurrentApic {
fn set_timer_divisor(&self, divisor: u32) {
let div_conf = match divisor {
1 => 0b1011,
2 => 0b0000,
4 => 0b0001,
8 => 0b0010,
16 => 0b0011,
32 => 0b1000,
64 => 0b1001,
128 => 0b1010,
_ => 0b1011,
};
if self.x2apic_enabled() {
unsafe { wrmsr(IA32_X2APIC_DIV_CONF, divisor.into()) };
unsafe { wrmsr(IA32_X2APIC_DIV_CONF, div_conf.into()) };
} else {
unsafe {
self.write_xapic_register(XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CLKDIV, divisor)
self.write_xapic_register(
XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CLKDIV,
div_conf,
)
};
}
}

View File

@ -480,6 +480,23 @@ impl CurrentApic {
}
}
pub(self) unsafe fn read_xapic_register(&self, reg: XApicOffset) -> u32 {
if let Some(xapic) = current_xapic_instance().borrow_mut().as_mut() {
return xapic.read(reg);
}
0
}
pub(super) fn read_timer_current_count(&self) -> u32 {
let reg = XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CURRENT_COUNT_REG as u32;
if self.x2apic_enabled() {
return unsafe { rdmsr(APIC_BASE_MSR + (reg >> 4)) as u32 };
}
unsafe {
self.read_xapic_register(XApicOffset::LOCAL_APIC_OFFSET_Local_APIC_CURRENT_COUNT_REG)
}
}
/// 屏蔽类8259A芯片
unsafe fn mask8259a(&self) {
CurrentPortIOArch::out8(0x21, 0xff);

View File

@ -6,6 +6,7 @@ use crate::{
libs::wait_queue::{TimeoutWaker, Waiter},
process::ProcessManager,
syscall::user_access::UserBufferWriter,
syscall::user_buffer::UserBuffer,
time::{
syscall::PosixTimeval,
timer::{next_n_us_timer_jiffies, Timer},
@ -15,16 +16,77 @@ use crate::{
use super::vfs::file::{File, FileFlags};
use alloc::sync::Arc;
use alloc::vec::Vec;
use system_error::SystemError;
#[repr(C)]
#[derive(Debug)]
#[derive(Debug, Clone, Copy)]
pub struct PollFd {
pub fd: c_int,
pub events: u16,
pub revents: u16,
}
#[inline]
fn pollfd_revents_offset() -> usize {
let base = core::ptr::null::<PollFd>();
unsafe { core::ptr::addr_of!((*base).revents) as usize - base as usize }
}
pub(crate) fn read_pollfds_from_user(
user_buf: &mut UserBuffer<'_>,
nfds: usize,
) -> Result<Vec<PollFd>, SystemError> {
let elem_size = core::mem::size_of::<PollFd>();
let total_len = nfds.checked_mul(elem_size).ok_or(SystemError::EINVAL)?;
if user_buf.len() < total_len {
return Err(SystemError::EFAULT);
}
let mut poll_fds = vec![
PollFd {
fd: 0,
events: 0,
revents: 0
};
nfds
];
let dst_bytes =
unsafe { core::slice::from_raw_parts_mut(poll_fds.as_mut_ptr() as *mut u8, total_len) };
let copied = user_buf.read_from_user(0, dst_bytes)?;
if copied != total_len {
return Err(SystemError::EFAULT);
}
Ok(poll_fds)
}
pub(crate) fn write_pollfds_revents_to_user(
user_buf: &mut UserBuffer<'_>,
poll_fds: &[PollFd],
) -> Result<(), SystemError> {
let elem_size = core::mem::size_of::<PollFd>();
let total_len = poll_fds
.len()
.checked_mul(elem_size)
.ok_or(SystemError::EINVAL)?;
if user_buf.len() < total_len {
return Err(SystemError::EFAULT);
}
let revents_off = pollfd_revents_offset();
for (i, pollfd) in poll_fds.iter().enumerate() {
let off = i
.checked_mul(elem_size)
.and_then(|v| v.checked_add(revents_off))
.ok_or(SystemError::EINVAL)?;
let bytes = pollfd.revents.to_ne_bytes();
let written = user_buf.write_to_user(off, &bytes)?;
if written != bytes.len() {
return Err(SystemError::EFAULT);
}
}
Ok(())
}
struct PollAdapter<'a> {
ep_file: Arc<File>,
poll_fds: &'a mut [PollFd],
@ -492,7 +554,12 @@ impl RestartFn for RestartFnPoll {
let mut poll_fds_writer =
UserBufferWriter::new(d.pollfd_ptr.as_ptr::<PollFd>(), len, true)?;
let mut r = do_sys_poll(poll_fds_writer.buffer(0)?, d.timeout_instant);
let mut user_buf = poll_fds_writer.buffer_protected(0)?;
let mut poll_fds = read_pollfds_from_user(&mut user_buf, d.nfds as usize)?;
let mut r = do_sys_poll(&mut poll_fds, d.timeout_instant);
if let Err(e) = write_pollfds_revents_to_user(&mut user_buf, &poll_fds) {
r = Err(e);
}
if let Err(SystemError::ERESTARTNOHAND) = r {
let restart_block = RestartBlock::new(&RestartFnPoll, data.clone());
r = ProcessManager::current_pcb().set_restart_fn(Some(restart_block));

View File

@ -1,6 +1,9 @@
use crate::arch::interrupt::TrapFrame;
use crate::arch::syscall::nr::SYS_POLL;
use crate::filesystem::poll::{do_sys_poll, poll_select_set_timeout, PollFd, RestartFnPoll};
use crate::filesystem::poll::{
do_sys_poll, poll_select_set_timeout, read_pollfds_from_user, write_pollfds_revents_to_user,
PollFd, RestartFnPoll,
};
use crate::ipc::signal::{RestartBlock, RestartBlockData};
use crate::mm::VirtAddr;
use crate::process::resource::RLimitID;
@ -133,7 +136,12 @@ pub fn do_poll(pollfd_ptr: usize, nfds: u32, timeout_ms: i32) -> Result<usize, S
}
let mut poll_fds_writer = UserBufferWriter::new(pollfd_ptr.as_ptr::<PollFd>(), len, true)?;
let mut r = do_sys_poll(poll_fds_writer.buffer(0)?, timeout);
let mut user_buf = poll_fds_writer.buffer_protected(0)?;
let mut poll_fds = read_pollfds_from_user(&mut user_buf, nfds as usize)?;
let mut r = do_sys_poll(&mut poll_fds, timeout);
if let Err(e) = write_pollfds_revents_to_user(&mut user_buf, &poll_fds) {
r = Err(e);
}
if let Err(SystemError::ERESTARTNOHAND) = r {
let restart_block_data = RestartBlockData::new_poll(pollfd_ptr, nfds, timeout);
let restart_block = RestartBlock::new(&RestartFnPoll, restart_block_data);

View File

@ -2,7 +2,8 @@ use crate::arch::interrupt::TrapFrame;
use crate::arch::ipc::signal::SigSet;
use crate::arch::syscall::nr::SYS_PPOLL;
use crate::filesystem::poll::{
do_sys_poll, poll_select_finish, poll_select_set_timeout, PollFd, PollTimeType,
do_sys_poll, poll_select_finish, poll_select_set_timeout, read_pollfds_from_user,
write_pollfds_revents_to_user, PollFd, PollTimeType,
};
use crate::ipc::signal::set_user_sigmask;
use crate::mm::VirtAddr;
@ -190,8 +191,13 @@ pub fn do_ppoll(
use crate::syscall::user_access::UserBufferWriter;
let mut poll_fds_writer =
UserBufferWriter::new(pollfd_ptr.as_ptr::<PollFd>(), pollfds_len, true)?;
let poll_fds = poll_fds_writer.buffer(0)?;
do_sys_poll(poll_fds, timeout_ts)
let mut user_buf = poll_fds_writer.buffer_protected(0)?;
let mut poll_fds = read_pollfds_from_user(&mut user_buf, nfds as usize)?;
let mut r = do_sys_poll(&mut poll_fds, timeout_ts);
if let Err(e) = write_pollfds_revents_to_user(&mut user_buf, &poll_fds) {
r = Err(e);
}
r
};
return poll_select_finish(timeout_ts, timespec_ptr, PollTimeType::TimeSpec, r);

View File

@ -44,6 +44,21 @@ impl TcpSocket {
Some(inner::Inner::Established(established)) => {
established.update_io_events(&self.pollee);
// If SHUT_WR was requested while there were pending TX bytes, send FIN once
// the TX queue drains to preserve Linux-like semantics.
if self.is_send_shutdown()
&& self
.send_fin_deferred
.load(core::sync::atomic::Ordering::Relaxed)
{
let pending = established.with(|socket| socket.send_queue());
if pending == 0 {
established.with_mut(|socket| socket.close());
self.send_fin_deferred
.store(false, core::sync::atomic::Ordering::Relaxed);
}
}
// If SHUT_WR, set EPOLLOUT so send() wakes up and returns EPIPE.
if self.is_send_shutdown() {
self.pollee.fetch_or(

View File

@ -288,11 +288,16 @@ impl TcpSocket {
}
if how.contains(ShutdownBit::SHUT_WR) {
if self
let pending = established.with(|socket| socket.send_queue());
if pending > 0 {
// Defer FIN until all queued data has been sent.
self.send_fin_deferred
.store(true, core::sync::atomic::Ordering::Relaxed);
} else if self
.send_fin_deferred
.load(core::sync::atomic::Ordering::Relaxed)
{
// FIN will be sent once cork-buffered bytes are flushed.
// FIN will be sent once deferred bytes are fully flushed.
} else {
established.with_mut(|socket| socket.close());
}

View File

@ -290,7 +290,10 @@ pub fn next_n_ms_timer_jiffies(expire_ms: u64) -> u64 {
}
/// 计算接下来n微秒对应的定时器时间片
pub fn next_n_us_timer_jiffies(expire_us: u64) -> u64 {
return TIMER_JIFFIES.load(Ordering::SeqCst) + expire_us * 1000 / NSEC_PER_JIFFY as u64;
let now = TIMER_JIFFIES.load(Ordering::SeqCst);
let ns = expire_us * 1000;
let jiffies = ns.div_ceil(NSEC_PER_JIFFY as u64);
now + jiffies
}
/// @brief 让pcb休眠timeout个jiffies