diff --git a/kernel/src/filesystem/ext4/inode.rs b/kernel/src/filesystem/ext4/inode.rs index d3b69834c..57b66856c 100644 --- a/kernel/src/filesystem/ext4/inode.rs +++ b/kernel/src/filesystem/ext4/inode.rs @@ -1,6 +1,6 @@ use crate::{ filesystem::{ - page_cache::PageCache, + page_cache::{PageCache, SyncPageCacheBackend}, vfs::{ self, utils::DName, vcore::generate_inode_id, FilePrivateData, IndexNode, InodeFlags, InodeId, InodeMode, @@ -523,7 +523,13 @@ impl LockedExt4Inode { // 设置self_ref guard.self_ref = Arc::downgrade(&inode); - let page_cache = PageCache::new(Some(Arc::downgrade(&inode) as Weak)); + let backend = Arc::new(SyncPageCacheBackend::new( + Arc::downgrade(&inode) as Weak + )); + let page_cache = PageCache::new( + Some(Arc::downgrade(&inode) as Weak), + Some(backend), + ); guard.page_cache = Some(page_cache); drop(guard); diff --git a/kernel/src/filesystem/fat/fs.rs b/kernel/src/filesystem/fat/fs.rs index 8c4dbe990..66454f091 100644 --- a/kernel/src/filesystem/fat/fs.rs +++ b/kernel/src/filesystem/fat/fs.rs @@ -19,7 +19,7 @@ use system_error::SystemError; use crate::driver::base::block::gendisk::GenDisk; use crate::driver::base::device::device_number::DeviceNumber; -use crate::filesystem::page_cache::PageCache; +use crate::filesystem::page_cache::{PageCache, SyncPageCacheBackend}; use crate::filesystem::vfs::utils::DName; use crate::filesystem::vfs::{Magic, SpecialNodeData, SuperBlock}; use crate::ipc::pipe::LockedPipeInode; @@ -267,7 +267,13 @@ impl LockedFATInode { }))); if !inode.0.lock().inode_type.is_dir() { - let page_cache = PageCache::new(Some(Arc::downgrade(&inode) as Weak)); + let backend = Arc::new(SyncPageCacheBackend::new( + Arc::downgrade(&inode) as Weak + )); + let page_cache = PageCache::new( + Some(Arc::downgrade(&inode) as Weak), + Some(backend), + ); inode.0.lock().page_cache = Some(page_cache); } diff --git a/kernel/src/filesystem/page_cache.rs b/kernel/src/filesystem/page_cache.rs index 1480bf366..33f90f612 100644 --- a/kernel/src/filesystem/page_cache.rs +++ b/kernel/src/filesystem/page_cache.rs @@ -25,12 +25,61 @@ use crate::{ use crate::{libs::align::page_align_up, mm::page::PageType}; static PAGE_CACHE_ID: AtomicUsize = AtomicUsize::new(0); + +pub trait PageCacheBackend: Send + Sync + core::fmt::Debug { + fn read_page(&self, index: usize, buf: &mut [u8]) -> Result; + fn write_page(&self, index: usize, buf: &[u8]) -> Result; + fn npages(&self) -> usize; +} + +#[derive(Debug)] +pub struct SyncPageCacheBackend { + inode: Weak, +} + +impl SyncPageCacheBackend { + pub fn new(inode: Weak) -> Self { + Self { inode } + } +} + +impl PageCacheBackend for SyncPageCacheBackend { + fn read_page(&self, index: usize, buf: &mut [u8]) -> Result { + let inode = self.inode.upgrade().ok_or(SystemError::EIO)?; + inode.read_sync(index * MMArch::PAGE_SIZE, buf) + } + + fn write_page(&self, index: usize, buf: &[u8]) -> Result { + let inode = self.inode.upgrade().ok_or(SystemError::EIO)?; + inode.write_sync(index * MMArch::PAGE_SIZE, buf) + } + + fn npages(&self) -> usize { + let inode = match self.inode.upgrade() { + Some(inode) => inode, + None => return 0, + }; + match inode.metadata() { + Ok(metadata) => { + let size = metadata.size.max(0) as usize; + if size == 0 { + 0 + } else { + (size + MMArch::PAGE_SIZE - 1) >> MMArch::PAGE_SHIFT + } + } + Err(_) => 0, + } + } +} + /// 页面缓存 #[derive(Debug)] pub struct PageCache { id: usize, inner: Mutex, inode: Lazy>, + backend: Lazy>, unevictable: AtomicBool, } @@ -122,6 +171,48 @@ impl InnerPageCache { Ok(()) } + fn read_pages_from_backend( + &mut self, + start_page_index: usize, + page_num: usize, + ) -> Result<(), SystemError> { + if page_num == 0 { + return Ok(()); + } + + let mut page_buf = vec![0u8; MMArch::PAGE_SIZE * page_num]; + + let backend = self + .page_cache_ref + .upgrade() + .and_then(|page_cache| page_cache.backend()); + + if let Some(backend) = backend { + for i in 0..page_num { + let buf_offset = i * MMArch::PAGE_SIZE; + let read_len = backend.read_page( + start_page_index + i, + &mut page_buf[buf_offset..buf_offset + MMArch::PAGE_SIZE], + )?; + if read_len < MMArch::PAGE_SIZE { + page_buf[buf_offset + read_len..buf_offset + MMArch::PAGE_SIZE].fill(0); + } + } + } else { + let inode: Arc = self + .page_cache_ref + .upgrade() + .unwrap() + .inode + .upgrade() + .unwrap(); + inode.read_sync(start_page_index * MMArch::PAGE_SIZE, page_buf.as_mut())?; + } + + self.create_pages(start_page_index, page_buf.as_mut())?; + Ok(()) + } + /// 创建若干个“零页”并加入 PageCache。 /// /// 与 `create_pages()` 的区别: @@ -257,12 +348,7 @@ impl InnerPageCache { } for (page_index, count) in not_exist { - // TODO 这里使用buffer避免多次读取磁盘,将来引入异步IO直接写入页面,减少内存开销和拷贝 - let mut page_buf = vec![0u8; MMArch::PAGE_SIZE * count]; - - inode.read_sync(page_index * MMArch::PAGE_SIZE, page_buf.as_mut())?; - - self.create_pages(page_index, page_buf.as_mut())?; + self.read_pages_from_backend(page_index, count)?; // 实际要拷贝的内容在文件中的偏移量 let copy_offset = core::cmp::max(page_index * MMArch::PAGE_SIZE, offset); @@ -460,7 +546,10 @@ impl Drop for InnerPageCache { } impl PageCache { - pub fn new(inode: Option>) -> Arc { + pub fn new( + inode: Option>, + backend: Option>, + ) -> Arc { let id = PAGE_CACHE_ID.fetch_add(1, Ordering::SeqCst); Arc::new_cyclic(|weak| Self { id, @@ -472,6 +561,13 @@ impl PageCache { } v }, + backend: { + let v: Lazy> = Lazy::new(); + if let Some(backend) = backend { + v.init(backend); + } + v + }, unevictable: AtomicBool::new(false), }) } @@ -495,6 +591,18 @@ impl PageCache { Ok(()) } + pub fn set_backend(&self, backend: Arc) -> Result<(), SystemError> { + if self.backend.initialized() { + return Err(SystemError::EINVAL); + } + self.backend.init(backend); + Ok(()) + } + + pub fn backend(&self) -> Option> { + self.backend.try_get().cloned() + } + pub fn lock(&self) -> MutexGuard<'_, InnerPageCache> { self.inner.lock() } @@ -505,6 +613,11 @@ impl PageCache { self.unevictable.store(unevictable, Ordering::Relaxed); } + pub fn read_pages(&self, start_page_index: usize, page_num: usize) -> Result<(), SystemError> { + let mut guard = self.inner.lock(); + guard.read_pages_from_backend(start_page_index, page_num) + } + /// 两阶段读取:持锁收集拷贝项,解锁后拷贝到目标缓冲区,避免用户缺页导致自锁 pub fn read(&self, offset: usize, buf: &mut [u8]) -> Result { let (copies, ret) = { diff --git a/kernel/src/filesystem/tmpfs/mod.rs b/kernel/src/filesystem/tmpfs/mod.rs index 2df80c69b..ef0e56dcd 100644 --- a/kernel/src/filesystem/tmpfs/mod.rs +++ b/kernel/src/filesystem/tmpfs/mod.rs @@ -2,7 +2,7 @@ use core::any::Any; use core::intrinsics::unlikely; use core::sync::atomic::{AtomicU64, Ordering}; -use crate::filesystem::page_cache::PageCache; +use crate::filesystem::page_cache::{PageCache, PageCacheBackend}; use crate::filesystem::vfs::syscall::RenameFlags; use crate::filesystem::vfs::{FileSystemMakerData, FSMAKER}; use crate::libs::rwsem::RwSem; @@ -46,6 +46,45 @@ const TMPFS_BLOCK_SIZE: u64 = 4096; const TMPFS_DEFAULT_MIN_SIZE_BYTES: usize = 16 * 1024 * 1024; // 16MiB const TMPFS_DEFAULT_MAX_SIZE_BYTES: usize = 4 * 1024 * 1024 * 1024; // 4GiB +#[derive(Debug)] +struct TmpfsPageCacheBackend { + inode: Weak, +} + +impl TmpfsPageCacheBackend { + fn new(inode: Weak) -> Self { + Self { inode } + } +} + +impl PageCacheBackend for TmpfsPageCacheBackend { + fn read_page(&self, _index: usize, _buf: &mut [u8]) -> Result { + Ok(0) + } + + fn write_page(&self, _index: usize, buf: &[u8]) -> Result { + Ok(buf.len()) + } + + fn npages(&self) -> usize { + let inode = match self.inode.upgrade() { + Some(inode) => inode, + None => return 0, + }; + match inode.metadata() { + Ok(metadata) => { + let size = metadata.size.max(0) as usize; + if size == 0 { + 0 + } else { + (size + MMArch::PAGE_SIZE - 1) >> MMArch::PAGE_SHIFT + } + } + Err(_) => 0, + } + } +} + fn tmpfs_move_entry_between_dirs( src_dir: &mut TmpfsInode, dst_dir: &mut TmpfsInode, @@ -761,7 +800,13 @@ impl IndexNode for LockedTmpfsInode { // 目前 VFS 使用 read_at/write_at 来读写 symlink 内容(readlink/symlink 语义), // 因此 symlink 也必须有 page_cache 后端,否则会在 write_at/read_at 返回 EIO。 if file_type == FileType::File || file_type == FileType::SymLink { - let pc = PageCache::new(Some(Arc::downgrade(&result) as Weak)); + let backend = Arc::new(TmpfsPageCacheBackend::new( + Arc::downgrade(&result) as Weak + )); + let pc = PageCache::new( + Some(Arc::downgrade(&result) as Weak), + Some(backend), + ); pc.set_unevictable(true); result.0.lock().page_cache = Some(pc); } diff --git a/kernel/src/mm/fault.rs b/kernel/src/mm/fault.rs index fe6967854..944eefa08 100644 --- a/kernel/src/mm/fault.rs +++ b/kernel/src/mm/fault.rs @@ -699,31 +699,17 @@ impl PageFaultHandler { // 直接将PageCache中的页面作为要映射的页面 pfm.page = Some(page.clone()); } else { - // TODO 同步预读 - // 涉及磁盘IO,返回标志为VM_FAULT_MAJOR - ret = VmFaultReason::VM_FAULT_MAJOR; - let mut buffer = vec![0u8; MMArch::PAGE_SIZE]; - match file.pread( - backing_pgoff * MMArch::PAGE_SIZE, - MMArch::PAGE_SIZE, - buffer.as_mut_slice(), - ) { - Ok(read_len) => { - // 超出文件末尾,返回SIGBUS而不是panic - if read_len == 0 { - return VmFaultReason::VM_FAULT_SIGBUS; - } - } - Err(e) => { - log::warn!( - "filemap_fault: pread failed at pgoff {}, err {:?}", - backing_pgoff, - e - ); + if let Ok(md) = file.inode().metadata() { + let size = md.size.max(0) as usize; + if size == 0 || backing_pgoff.saturating_mul(MMArch::PAGE_SIZE) >= size { return VmFaultReason::VM_FAULT_SIGBUS; } } - drop(buffer); + + ret = VmFaultReason::VM_FAULT_MAJOR; + if page_cache.read_pages(backing_pgoff, 1).is_err() { + return VmFaultReason::VM_FAULT_SIGBUS; + } let page = page_cache.lock().get_page(backing_pgoff); if let Some(page) = page { diff --git a/kernel/src/mm/page.rs b/kernel/src/mm/page.rs index b4176e3b4..1c525b11b 100644 --- a/kernel/src/mm/page.rs +++ b/kernel/src/mm/page.rs @@ -398,6 +398,7 @@ impl PageReclaimer { }; let paddr = guard.phys_address(); let inode = page_cache.inode().clone().unwrap().upgrade().unwrap(); + let backend = page_cache.backend(); for vma in guard.vma_set() { let address_space = vma.lock().address_space().and_then(|x| x.upgrade()); @@ -437,17 +438,22 @@ impl PageReclaimer { }; if len > 0 { - let r = inode.write_direct( - page_start, - len, - unsafe { - core::slice::from_raw_parts( - MMArch::phys_2_virt(paddr).unwrap().data() as *const u8, - len, - ) - }, - Mutex::new(FilePrivateData::Unused).lock(), - ); + let data = unsafe { + core::slice::from_raw_parts( + MMArch::phys_2_virt(paddr).unwrap().data() as *const u8, + len, + ) + }; + let r = if let Some(backend) = backend { + backend.write_page(page_index, data) + } else { + inode.write_direct( + page_start, + len, + data, + Mutex::new(FilePrivateData::Unused).lock(), + ) + }; if let Err(e) = r { log::error!( "page writeback failed: offset={}, len={}, err={:?}", diff --git a/kernel/src/mm/readahead.rs b/kernel/src/mm/readahead.rs index 6349c3f9d..59382a53a 100644 --- a/kernel/src/mm/readahead.rs +++ b/kernel/src/mm/readahead.rs @@ -119,23 +119,20 @@ impl<'a> ReadaheadControl<'a> { let ranges = merge_ranges(&missing_pages); let mut total_read = 0; + let file_size = self.inode.metadata()?.size.max(0) as usize; + if file_size == 0 { + return Ok(0); + } for (page_index, count) in ranges { - let mut page_buf = alloc::vec![0u8; MMArch::PAGE_SIZE * count]; - let offset = page_index << MMArch::PAGE_SHIFT; - let read_len = self.inode.read_sync(offset, &mut page_buf)?; - - if read_len == 0 { + let start_offset = page_index * MMArch::PAGE_SIZE; + let end_offset = core::cmp::min((page_index + count) * MMArch::PAGE_SIZE, file_size); + if end_offset <= start_offset { continue; } - - page_buf.truncate(read_len); - let actual_page_count = (read_len + MMArch::PAGE_SIZE - 1) >> MMArch::PAGE_SHIFT; - - let mut page_cache_guard = page_cache.lock(); - page_cache_guard.create_pages(page_index, &page_buf)?; - drop(page_cache_guard); - + let actual_page_count = + (end_offset - start_offset + MMArch::PAGE_SIZE - 1) >> MMArch::PAGE_SHIFT; + page_cache.read_pages(page_index, actual_page_count)?; total_read += actual_page_count; } diff --git a/kernel/src/perf/bpf.rs b/kernel/src/perf/bpf.rs index 96d6c54fb..085a74f1c 100644 --- a/kernel/src/perf/bpf.rs +++ b/kernel/src/perf/bpf.rs @@ -235,7 +235,7 @@ impl BpfPerfEvent { data: SpinLock::new(BpfPerfEventData { enabled: false, mmap_page: RingPage::empty(), - page_cache: PageCache::new(None), + page_cache: PageCache::new(None, None), offset: 0, }), }