Refactor page cache implementation

2026-02-11 09:26:01 +00:00 · 2026-02-11 09:26:01 +00:00 · 5de0d191c1
parent b0407dd517
commit 5de0d191c1
3 changed files with 803 additions and 760 deletions
--- a/kernel/src/fs/utils/page_cache.rs
+++ b/kernel/src/fs/utils/page_cache.rs
@ -1,505 +1,133 @@
 // SPDX-License-Identifier: MPL-2.0

-#![expect(dead_code)]
-
 use core::{
-    ops::Range,
-    sync::atomic::{AtomicU8, Ordering},
+    ops::{Deref, Range},
+    sync::atomic::{AtomicBool, AtomicU8, Ordering},
 };

-use align_ext::AlignExt;
 use aster_block::bio::{BioStatus, BioWaiter};
-use lru::LruCache;
 use ostd::{
    impl_untyped_frame_meta_for,
-    mm::{Frame, FrameAllocOptions, UFrame, VmIoFill},
+    mm::{Frame, FrameAllocOptions, HasPaddr},
+    sync::WaitQueue,
 };

-use crate::{
-    prelude::*,
-    vm::vmo::{Pager, Vmo, VmoFlags, VmoOptions, get_page_idx_range},
-};
+use crate::{prelude::*, vm::vmo::Vmo};

-pub struct PageCache {
-    pages: Arc<Vmo>,
-    manager: Arc<PageCacheManager>,
-}
+/// The page cache type.
+///
+/// The page cache is implemented using a [`Vmo`]. Typically, a page cache for
+/// a disk-based file system (e.g., ext2, exfat) is a **disk-backed VMO**, which
+/// is associated with a [`PageCacheBackend`] that provides I/O operations to read
+/// from and write to the underlying block device. In contrast, for purely in-memory
+/// file systems (e.g., ramfs), the page cache is an **anonymous VMO** — it has no
+/// backend and its pages exist only in RAM.
+pub type PageCache = Arc<Vmo>;

-impl PageCache {
-    /// Creates an empty size page cache associated with a new backend.
-    pub fn new(backend: Weak<dyn PageCacheBackend>) -> Result<Self> {
-        let manager = Arc::new(PageCacheManager::new(backend));
-        let pages = VmoOptions::new(0)
-            .flags(VmoFlags::RESIZABLE)
-            .pager(manager.clone())
-            .alloc()?;
-        Ok(Self { pages, manager })
-    }
+/// A trait for page cache operations.
+///
+/// The page cache serves as an in-memory buffer between the file system and
+/// block devices, caching frequently accessed file data to improve performance.
+pub trait PageCacheOps {
+    /// Creates a new page cache with the specified capacity.
+    fn with_capacity(capacity: usize, backend: Weak<dyn PageCacheBackend>) -> Result<Arc<Self>>;

-    /// Creates a page cache associated with an existing backend.
+    /// Resizes the page cache to the target size.
    ///
-    /// The `capacity` is the initial cache size required by the backend.
-    /// This size usually corresponds to the size of the backend.
-    pub fn with_capacity(capacity: usize, backend: Weak<dyn PageCacheBackend>) -> Result<Self> {
-        let manager = Arc::new(PageCacheManager::new(backend));
-        let pages = VmoOptions::new(capacity)
-            .flags(VmoFlags::RESIZABLE)
-            .pager(manager.clone())
-            .alloc()?;
-        Ok(Self { pages, manager })
-    }
-
-    /// Returns the Vmo object.
-    pub fn pages(&self) -> &Arc<Vmo> {
-        &self.pages
-    }
-
-    /// Evict the data within a specified range from the page cache and persist
-    /// them to the backend.
-    pub fn evict_range(&self, range: Range<usize>) -> Result<()> {
-        self.manager.evict_range(range)
-    }
-
-    /// Evict the data within a specified range from the page cache without persisting
-    /// them to the backend.
-    pub fn discard_range(&self, range: Range<usize>) {
-        self.manager.discard_range(range)
-    }
-
-    /// Returns the backend.
-    pub fn backend(&self) -> Arc<dyn PageCacheBackend> {
-        self.manager.backend()
-    }
-
-    /// Resizes the current page cache to a target size.
-    pub fn resize(&self, new_size: usize) -> Result<()> {
-        // If the new size is smaller and not page-aligned,
-        // first zero the gap between the new size and the
-        // next page boundary (or the old size), if such a gap exists.
-        let old_size = self.pages.size();
-        if old_size > new_size && !new_size.is_multiple_of(PAGE_SIZE) {
-            let gap_size = old_size.min(new_size.align_up(PAGE_SIZE)) - new_size;
-            if gap_size > 0 {
-                self.fill_zeros(new_size..new_size + gap_size)?;
-            }
-        }
-        self.pages.resize(new_size)
-    }
-
-    /// Fill the specified range with zeros in the page cache.
-    pub fn fill_zeros(&self, range: Range<usize>) -> Result<()> {
-        if range.is_empty() {
-            return Ok(());
-        }
-        let (start, end) = (range.start, range.end);
-
-        // Write zeros to the first partial page if any
-        let first_page_end = start.align_up(PAGE_SIZE);
-        if first_page_end > start {
-            let zero_len = first_page_end.min(end) - start;
-            self.pages().fill_zeros(start, zero_len)?;
-        }
-
-        // Write zeros to the last partial page if any
-        let last_page_start = end.align_down(PAGE_SIZE);
-        if last_page_start < end && last_page_start >= start {
-            let zero_len = end - last_page_start;
-            self.pages().fill_zeros(last_page_start, zero_len)?;
-        }
-
-        for offset in (first_page_end..last_page_start).step_by(PAGE_SIZE) {
-            self.pages().fill_zeros(offset, PAGE_SIZE)?;
-        }
-        Ok(())
-    }
-}
-
-impl Drop for PageCache {
-    fn drop(&mut self) {
-        // TODO:
-        // The default destruction procedure exhibits slow performance.
-        // In contrast, resizing the `VMO` to zero greatly accelerates the process.
-        // We need to find out the underlying cause of this discrepancy.
-        let _ = self.pages.resize(0);
-    }
-}
-
-impl Debug for PageCache {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        f.debug_struct("PageCache")
-            .field("size", &self.pages.size())
-            .field("manager", &self.manager)
-            .finish()
-    }
-}
-
-struct ReadaheadWindow {
-    /// The window.
-    window: Range<usize>,
-    /// Look ahead position in the current window, where the readahead is triggered.
-    /// TODO: We set the `lookahead_index` to the start of the window for now.
-    /// This should be adjustable by the user.
-    lookahead_index: usize,
-}
-
-impl ReadaheadWindow {
-    pub fn new(window: Range<usize>) -> Self {
-        let lookahead_index = window.start;
-        Self {
-            window,
-            lookahead_index,
-        }
-    }
-
-    /// Gets the next readahead window.
-    /// Most of the time, we push the window forward and double its size.
+    /// The `new_size` will be rounded up to page boundaries. If the new size is smaller
+    /// than the current size, pages that fall entirely within the truncated range will be
+    /// decommitted (freed). For the page that is only partially truncated (i.e., the page
+    /// containing the new boundary), the truncated portion will be filled with zeros instead.
    ///
-    /// The `max_size` is the maximum size of the window.
-    /// The `max_page` is the total page number of the file, and the window should not
-    /// exceed the scope of the file.
-    pub fn next(&self, max_size: usize, max_page: usize) -> Self {
-        let new_start = self.window.end;
-        let cur_size = self.window.end - self.window.start;
-        let new_size = (cur_size * 2).min(max_size).min(max_page - new_start);
-        Self {
-            window: new_start..(new_start + new_size),
-            lookahead_index: new_start,
-        }
-    }
+    /// The `old_size` represents the actual used range of the page cache (i.e., the logical
+    /// size of the cached content), which may differ from the total capacity of the page cache.
+    /// It is used to determine the boundary of the previously valid data so that only the
+    /// discarded logical range (from `new_size` to `old_size`) within a partially truncated
+    /// page needs to be zero-filled.
+    fn resize(&self, new_size: usize, old_size: usize) -> Result<()>;

-    pub fn lookahead_index(&self) -> usize {
-        self.lookahead_index
-    }
+    /// Flushes the dirty pages in the specified range to the backend storage.
+    ///
+    /// This operation ensures that any modifications made to the pages within the given
+    /// range are persisted to the underlying storage device or file system.
+    ///
+    /// If the given range exceeds the current size of the page cache, only the pages within
+    /// the valid range will be flushed.
+    fn flush_range(&self, range: Range<usize>) -> Result<()>;

-    pub fn readahead_index(&self) -> usize {
-        self.window.end
-    }
+    /// Discards the pages within the specified range from the page cache.
+    ///
+    /// This operation will first **flush** the dirty pages in the range to the backend storage,
+    /// ensuring that any modifications are persisted. After flushing, the pages are removed
+    /// from the page cache. This is useful for invalidating cached data that is no longer needed
+    /// or has become stale.
+    fn discard_range(&self, range: Range<usize>) -> Result<()>;

-    pub fn readahead_range(&self) -> Range<usize> {
-        self.window.clone()
-    }
-}
-
-struct ReadaheadState {
-    /// Current readahead window.
-    ra_window: Option<ReadaheadWindow>,
-    /// Maximum window size.
-    max_size: usize,
-    /// The last page visited, used to determine sequential I/O.
-    prev_page: Option<usize>,
-    /// Readahead requests waiter.
-    waiter: BioWaiter,
-}
-
-impl ReadaheadState {
-    const INIT_WINDOW_SIZE: usize = 4;
-    const DEFAULT_MAX_SIZE: usize = 32;
-
-    pub fn new() -> Self {
-        Self {
-            ra_window: None,
-            max_size: Self::DEFAULT_MAX_SIZE,
-            prev_page: None,
-            waiter: BioWaiter::new(),
-        }
-    }
-
-    /// Sets the maximum readahead window size.
-    pub fn set_max_window_size(&mut self, size: usize) {
-        self.max_size = size;
-    }
-
-    fn is_sequential(&self, idx: usize) -> bool {
-        if let Some(prev) = self.prev_page {
-            idx == prev || idx == prev + 1
-        } else {
-            false
-        }
-    }
-
-    /// The number of bio requests in waiter.
-    /// This number will be zero if there are no previous readahead.
-    pub fn request_number(&self) -> usize {
-        self.waiter.nreqs()
-    }
-
-    /// Checks for the previous readahead.
-    /// Returns true if the previous readahead has been completed.
-    pub fn prev_readahead_is_completed(&self) -> bool {
-        let nreqs = self.request_number();
-        if nreqs == 0 {
-            return false;
-        }
-
-        for i in 0..nreqs {
-            if self.waiter.status(i) == BioStatus::Submit {
-                return false;
-            }
-        }
-        true
-    }
-
-    /// Waits for the previous readahead.
-    pub fn wait_for_prev_readahead(
-        &mut self,
-        pages: &mut MutexGuard<LruCache<usize, CachePage>>,
-    ) -> Result<()> {
-        if matches!(self.waiter.wait(), Some(BioStatus::Complete)) {
-            let Some(window) = &self.ra_window else {
-                return_errno!(Errno::EINVAL)
-            };
-            for idx in window.readahead_range() {
-                if let Some(page) = pages.get_mut(&idx) {
-                    page.store_state(PageState::UpToDate);
-                }
-            }
-            self.waiter.clear();
-        } else {
-            return_errno!(Errno::EIO)
-        }
-
-        Ok(())
-    }
-
-    /// Determines whether a new readahead should be performed.
-    /// We only consider readahead for sequential I/O now.
-    /// There should be at most one in-progress readahead.
-    pub fn should_readahead(&self, idx: usize, max_page: usize) -> bool {
-        if self.request_number() == 0 && self.is_sequential(idx) {
-            if let Some(cur_window) = &self.ra_window {
-                let trigger_readahead =
-                    idx == cur_window.lookahead_index() || idx == cur_window.readahead_index();
-                let next_window_exist = cur_window.readahead_range().end < max_page;
-                trigger_readahead && next_window_exist
-            } else {
-                let new_window_start = idx + 1;
-                new_window_start < max_page
-            }
-        } else {
-            false
-        }
-    }
-
-    /// Setup the new readahead window.
-    pub fn setup_window(&mut self, idx: usize, max_page: usize) {
-        let new_window = if let Some(cur_window) = &self.ra_window {
-            cur_window.next(self.max_size, max_page)
-        } else {
-            let start_idx = idx + 1;
-            let init_size = Self::INIT_WINDOW_SIZE.min(self.max_size);
-            let end_idx = (start_idx + init_size).min(max_page);
-            ReadaheadWindow::new(start_idx..end_idx)
-        };
-        self.ra_window = Some(new_window);
-    }
-
-    /// Conducts the new readahead.
-    /// Sends the relevant read request and sets the relevant page in the page cache to `Uninit`.
-    pub fn conduct_readahead(
-        &mut self,
-        pages: &mut MutexGuard<LruCache<usize, CachePage>>,
-        backend: Arc<dyn PageCacheBackend>,
-    ) -> Result<()> {
-        let Some(window) = &self.ra_window else {
-            return_errno!(Errno::EINVAL)
-        };
-        for async_idx in window.readahead_range() {
-            let mut async_page = CachePage::alloc_uninit()?;
-            let pg_waiter = backend.read_page_async(async_idx, &async_page)?;
-            if pg_waiter.nreqs() > 0 {
-                self.waiter.concat(pg_waiter);
-            } else {
-                // Some backends (e.g. RamFs) do not issue requests, but fill the page directly.
-                async_page.store_state(PageState::UpToDate);
-            }
-            pages.put(async_idx, async_page);
-        }
-        Ok(())
-    }
-
-    /// Sets the last page visited.
-    pub fn set_prev_page(&mut self, idx: usize) {
-        self.prev_page = Some(idx);
-    }
-}
-
-struct PageCacheManager {
-    pages: Mutex<LruCache<usize, CachePage>>,
-    backend: Weak<dyn PageCacheBackend>,
-    ra_state: Mutex<ReadaheadState>,
-}
-
-impl PageCacheManager {
-    pub fn new(backend: Weak<dyn PageCacheBackend>) -> Self {
-        Self {
-            pages: Mutex::new(LruCache::unbounded()),
-            backend,
-            ra_state: Mutex::new(ReadaheadState::new()),
-        }
-    }
-
-    pub fn backend(&self) -> Arc<dyn PageCacheBackend> {
-        self.backend.upgrade().unwrap()
-    }
-
-    // Discard pages without writing them back to disk.
-    pub fn discard_range(&self, range: Range<usize>) {
-        let page_idx_range = get_page_idx_range(&range);
-        let mut pages = self.pages.lock();
-        for idx in page_idx_range {
-            pages.pop(&idx);
-        }
-    }
-
-    pub fn evict_range(&self, range: Range<usize>) -> Result<()> {
-        let page_idx_range = get_page_idx_range(&range);
-
-        let mut bio_waiter = BioWaiter::new();
-        let mut pages = self.pages.lock();
-        let backend = self.backend();
-        let backend_npages = backend.npages();
-        for idx in page_idx_range.start..page_idx_range.end {
-            if let Some(page) = pages.peek(&idx)
-                && page.load_state() == PageState::Dirty
-                && idx < backend_npages
-            {
-                let waiter = backend.write_page_async(idx, page)?;
-                bio_waiter.concat(waiter);
-            }
-        }
-
-        if !matches!(bio_waiter.wait(), Some(BioStatus::Complete)) {
-            // Do not allow partial failure
-            return_errno!(Errno::EIO);
-        }
-
-        for (_, page) in pages
-            .iter_mut()
-            .filter(|(idx, _)| page_idx_range.contains(*idx))
-        {
-            page.store_state(PageState::UpToDate);
-        }
-        Ok(())
-    }
-
-    fn ondemand_readahead(&self, idx: usize) -> Result<UFrame> {
-        let mut pages = self.pages.lock();
-        let mut ra_state = self.ra_state.lock();
-        let backend = self.backend();
-        // Checks for the previous readahead.
-        if ra_state.prev_readahead_is_completed() {
-            ra_state.wait_for_prev_readahead(&mut pages)?;
-        }
-        // There are three possible conditions that could be encountered upon reaching here.
-        // 1. The requested page is ready for read in page cache.
-        // 2. The requested page is in previous readahead range, not ready for now.
-        // 3. The requested page is on disk, need a sync read operation here.
-        let frame = if let Some(page) = pages.get(&idx) {
-            // Cond 1 & 2.
-            if let PageState::Uninit = page.load_state() {
-                // Cond 2: We should wait for the previous readahead.
-                // If there is no previous readahead, an error must have occurred somewhere.
-                assert!(ra_state.request_number() != 0);
-                ra_state.wait_for_prev_readahead(&mut pages)?;
-                pages.get(&idx).unwrap().clone()
-            } else {
-                // Cond 1.
-                page.clone()
-            }
-        } else {
-            // Cond 3.
-            // Conducts the sync read operation.
-            let page = if idx < backend.npages() {
-                let mut page = CachePage::alloc_uninit()?;
-                backend.read_page(idx, &page)?;
-                page.store_state(PageState::UpToDate);
-                page
-            } else {
-                CachePage::alloc_zero(PageState::Uninit)?
-            };
-            let frame = page.clone();
-            pages.put(idx, page);
-            frame
-        };
-        if ra_state.should_readahead(idx, backend.npages()) {
-            ra_state.setup_window(idx, backend.npages());
-            ra_state.conduct_readahead(&mut pages, backend)?;
-        }
-        ra_state.set_prev_page(idx);
-        Ok(frame.into())
-    }
-}
-
-impl Debug for PageCacheManager {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        f.debug_struct("PageCacheManager")
-            .field("pages", &self.pages.lock())
-            .finish()
-    }
-}
-
-impl Pager for PageCacheManager {
-    fn commit_page(&self, idx: usize) -> Result<UFrame> {
-        self.ondemand_readahead(idx)
-    }
-
-    fn update_page(&self, idx: usize) -> Result<()> {
-        let mut pages = self.pages.lock();
-        if let Some(page) = pages.get_mut(&idx) {
-            page.store_state(PageState::Dirty);
-        } else {
-            warn!("The page {} is not in page cache", idx);
-        }
-
-        Ok(())
-    }
-
-    fn decommit_page(&self, idx: usize) -> Result<()> {
-        let page_result = self.pages.lock().pop(&idx);
-        if let Some(page) = page_result
-            && let PageState::Dirty = page.load_state()
-        {
-            let Some(backend) = self.backend.upgrade() else {
-                return Ok(());
-            };
-            if idx < backend.npages() {
-                backend.write_page(idx, &page)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    fn commit_overwrite(&self, idx: usize) -> Result<UFrame> {
-        if let Some(page) = self.pages.lock().get(&idx) {
-            return Ok(page.clone().into());
-        }
-
-        let page = CachePage::alloc_uninit()?;
-        Ok(self.pages.lock().get_or_insert(idx, || page).clone().into())
-    }
+    /// Fills the specified range of the page cache with zeros.
+    fn fill_zeros(&self, range: Range<usize>) -> Result<()>;
 }

 /// A page in the page cache.
 pub type CachePage = Frame<CachePageMeta>;

+const PAGE_WAIT_QUEUE_MASK: usize = 0xff;
+const PAGE_WAIT_QUEUE_NUM: usize = PAGE_WAIT_QUEUE_MASK + 1;
+
+/// Global array of wait queues for page cache operations.
+///
+/// Each wait queue in this array handles wait/wake operations for a subset of cache pages.
+/// The queue for a specific page is selected using: `PAGE_WAIT_QUEUES[page.paddr() & PAGE_WAIT_QUEUE_MASK]`.
+///
+/// This approach avoids the overhead of per-page wait queues while still providing
+/// reasonable concurrency through hashing.
+static PAGE_WAIT_QUEUES: [WaitQueue; PAGE_WAIT_QUEUE_NUM] =
+    [const { WaitQueue::new() }; PAGE_WAIT_QUEUE_NUM];
+
 /// Metadata for a page in the page cache.
 #[derive(Debug)]
 pub struct CachePageMeta {
-    pub state: AtomicPageState,
+    /// The current state of the page (uninit, up-to-date, or dirty).
+    state: AtomicPageState,
+    /// This bit acts as a mutex for the corresponding page.
+    ///
+    /// When this bit is set, the holder has the exclusive right to perform critical
+    /// state transitions (e.g., preparing for I/O).
+    lock: AtomicBool,
    // TODO: Add a reverse mapping from the page to VMO for eviction.
 }

+impl Default for CachePageMeta {
+    fn default() -> Self {
+        Self {
+            state: AtomicPageState::new(PageState::Uninit),
+            lock: AtomicBool::new(false),
+        }
+    }
+}
+
 impl_untyped_frame_meta_for!(CachePageMeta);

-pub trait CachePageExt {
+pub trait CachePageExt: Sized {
    /// Gets the metadata associated with the cache page.
    fn metadata(&self) -> &CachePageMeta;

+    /// Gets the wait queue associated with the cache page.
+    fn wait_queue(&self) -> &'static WaitQueue;
+
+    /// Tries to lock the cache page.
+    fn try_lock(&self) -> Option<LockedCachePage>;
+
+    /// Locks the cache page, blocking until the lock is acquired.
+    fn lock(self) -> LockedCachePage;
+
+    /// Ensures the page is initialized, calling `init_fn` if necessary.
+    fn ensure_init(&self, init_fn: impl Fn(LockedCachePage) -> Result<()>) -> Result<()>;
+
    /// Allocates a new cache page which content and state are uninitialized.
    fn alloc_uninit() -> Result<CachePage> {
        let meta = CachePageMeta {
            state: AtomicPageState::new(PageState::Uninit),
+            lock: AtomicBool::new(false),
        };
        let page = FrameAllocOptions::new()
            .zeroed(false)
@ -508,9 +136,10 @@ pub trait CachePageExt {
    }

    /// Allocates a new zeroed cache page with the wanted state.
-    fn alloc_zero(state: PageState) -> Result<CachePage> {
+    fn alloc_zero() -> Result<CachePage> {
        let meta = CachePageMeta {
-            state: AtomicPageState::new(state),
+            state: AtomicPageState::new(PageState::UpToDate),
+            lock: AtomicBool::new(false),
        };
        let page = FrameAllocOptions::new()
            .zeroed(true)
@ -518,14 +147,19 @@ pub trait CachePageExt {
        Ok(page)
    }

-    /// Loads the current state of the cache page.
-    fn load_state(&self) -> PageState {
-        self.metadata().state.load(Ordering::Relaxed)
+    fn is_uninit(&self) -> bool {
+        matches!(
+            self.metadata().state.load(Ordering::Acquire),
+            PageState::Uninit
+        )
    }

-    /// Stores a new state for the cache page.
-    fn store_state(&mut self, new_state: PageState) {
-        self.metadata().state.store(new_state, Ordering::Relaxed);
+    /// Checks if the page is dirty.
+    fn is_dirty(&self) -> bool {
+        matches!(
+            self.metadata().state.load(Ordering::Acquire),
+            PageState::Dirty
+        )
    }
 }

@ -533,8 +167,124 @@ impl CachePageExt for CachePage {
    fn metadata(&self) -> &CachePageMeta {
        self.meta()
    }
+
+    fn wait_queue(&self) -> &'static WaitQueue {
+        &PAGE_WAIT_QUEUES[self.paddr() & PAGE_WAIT_QUEUE_MASK]
+    }
+
+    fn try_lock(&self) -> Option<LockedCachePage> {
+        let wait_queue = self.wait_queue();
+        self.metadata()
+            .lock
+            .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
+            .is_ok()
+            .then(|| LockedCachePage::new(self.clone(), wait_queue))
+    }
+
+    fn lock(self) -> LockedCachePage {
+        let wait_queue = self.wait_queue();
+        self.wait_queue().wait_until(|| {
+            self.metadata()
+                .lock
+                .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
+                .ok()
+        });
+        LockedCachePage::new(self, wait_queue)
+    }
+
+    fn ensure_init(&self, init_fn: impl Fn(LockedCachePage) -> Result<()>) -> Result<()> {
+        // Fast path: if the page is already initialized, return immediately without waiting.
+        if !self.is_uninit() {
+            return Ok(());
+        }
+
+        let lock_page = self.clone().lock();
+        // Check again after acquiring the lock to avoid duplicate initialization.
+        if !lock_page.is_uninit() {
+            return Ok(());
+        }
+
+        init_fn(lock_page)
+    }
 }

+/// A locked cache page.
+///
+/// The locked page has the exclusive right to perform critical
+/// state transitions (e.g., preparing for I/O).
+pub struct LockedCachePage {
+    page: Option<CachePage>,
+    wait_queue: &'static WaitQueue,
+}
+
+impl Debug for LockedCachePage {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        f.debug_struct("LockedCachePage")
+            .field("page", &self.page)
+            .finish()
+    }
+}
+
+impl LockedCachePage {
+    fn new(page: CachePage, wait_queue: &'static WaitQueue) -> Self {
+        Self {
+            page: Some(page),
+            wait_queue,
+        }
+    }
+
+    /// Unlocks the page and returns the underlying cache page.
+    pub fn unlock(mut self) -> CachePage {
+        let page = self.page.take().expect("page already taken");
+        page.metadata().lock.store(false, Ordering::Release);
+        self.wait_queue.wake_all();
+        page
+    }
+
+    fn page(&self) -> &CachePage {
+        self.page.as_ref().expect("page already taken")
+    }
+
+    /// Marks the page as up-to-date.
+    ///
+    /// This indicates that the page's contents are synchronized with disk
+    /// and can be safely read.
+    pub fn set_up_to_date(&self) {
+        self.page()
+            .metadata()
+            .state
+            .store(PageState::UpToDate, Ordering::Relaxed);
+    }
+
+    /// Marks the page as dirty.
+    ///
+    /// This indicates that the page has been modified and needs to be
+    /// written back to disk eventually.
+    pub fn set_dirty(&self) {
+        self.metadata()
+            .state
+            .store(PageState::Dirty, Ordering::Relaxed);
+    }
+}
+
+impl Deref for LockedCachePage {
+    type Target = CachePage;
+
+    fn deref(&self) -> &Self::Target {
+        self.page.as_ref().expect("page already taken")
+    }
+}
+
+impl Drop for LockedCachePage {
+    fn drop(&mut self) {
+        if let Some(page) = &self.page {
+            page.metadata().lock.store(false, Ordering::Release);
+            self.wait_queue.wake_all();
+        }
+    }
+}
+
+/// The state of a page in the page cache.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[repr(u8)]
 pub enum PageState {
@ -580,25 +330,26 @@ impl AtomicPageState {
 /// This trait represents the backend for the page cache.
 pub trait PageCacheBackend: Sync + Send {
    /// Reads a page from the backend asynchronously.
-    fn read_page_async(&self, idx: usize, frame: &CachePage) -> Result<BioWaiter>;
+    fn read_page_async(&self, idx: usize, frame: LockedCachePage) -> Result<BioWaiter>;
    /// Writes a page to the backend asynchronously.
-    fn write_page_async(&self, idx: usize, frame: &CachePage) -> Result<BioWaiter>;
+    fn write_page_async(&self, idx: usize, frame: LockedCachePage) -> Result<BioWaiter>;
    /// Returns the number of pages in the backend.
    fn npages(&self) -> usize;
 }

 impl dyn PageCacheBackend {
    /// Reads a page from the backend synchronously.
-    fn read_page(&self, idx: usize, frame: &CachePage) -> Result<()> {
-        let waiter = self.read_page_async(idx, frame)?;
+    pub fn read_page(&self, idx: usize, page: LockedCachePage) -> Result<()> {
+        let waiter = self.read_page_async(idx, page)?;
        match waiter.wait() {
            Some(BioStatus::Complete) => Ok(()),
            _ => return_errno!(Errno::EIO),
        }
    }
+
    /// Writes a page to the backend synchronously.
-    fn write_page(&self, idx: usize, frame: &CachePage) -> Result<()> {
-        let waiter = self.write_page_async(idx, frame)?;
+    pub fn write_page(&self, idx: usize, page: LockedCachePage) -> Result<()> {
+        let waiter = self.write_page_async(idx, page)?;
        match waiter.wait() {
            Some(BioStatus::Complete) => Ok(()),
            _ => return_errno!(Errno::EIO),
--- a/kernel/src/vm/vmo/mod.rs
+++ b/kernel/src/vm/vmo/mod.rs
@ -6,34 +6,45 @@
 //! Virtual Memory Objects (VMOs).

 use core::{
-    ops::Range,
+    cell::RefCell,
+    ops::{Deref, Range},
    sync::atomic::{AtomicIsize, AtomicUsize, Ordering},
 };

 use align_ext::AlignExt;
 use ostd::{
-    mm::{
-        FrameAllocOptions, UFrame, VmIo, VmIoFill, VmReader, VmWriter, io_util::HasVmReaderWriter,
-    },
+    mm::{VmIo, VmIoFill, VmReader, VmWriter, io_util::HasVmReaderWriter},
    task::disable_preempt,
 };
 use xarray::{Cursor, LockedXArray, XArray};

-use crate::prelude::*;
+use crate::{
+    fs::utils::{CachePage, CachePageExt, LockedCachePage, PageCacheBackend},
+    prelude::*,
+};

 mod options;
-mod pager;
+mod page_cache;

 pub use options::VmoOptions;
-pub use pager::Pager;

-/// Virtual Memory Objects (VMOs) are a type of capability that represents a
-/// range of memory pages.
+/// Virtual Memory Objects (VMOs) represent contiguous ranges of virtual memory pages.
 ///
-/// Broadly speaking, there are two types of VMO:
-/// 1. File-backed VMO: the VMO backed by a file and resides in the page cache,
-///    which includes a [`Pager`] to provide it with actual pages.
-/// 2. Anonymous VMO: the VMO without a file backup, which does not have a `Pager`.
+/// VMOs serve as the fundamental building blocks for memory management in Asterinas,
+/// providing a unified interface for both anonymous (RAM-backed) and disk-backed memory.
+///
+/// # Types of VMOs
+///
+/// There are two primary types of VMOs, distinguished by their backing storage:
+///
+/// 1. **Anonymous VMO**: Backed purely by RAM with no persistent storage. Pages are
+///    initially zero-filled and exist only in memory. These are typically used for
+///    heap allocations, anonymous mappings, and stack memory.
+///
+/// 2. **Disk-backed VMO**: Associated with a disk-backed file through a [`PageCacheBackend`].
+///    Pages are lazily loaded from the file on first access and can be written back
+///    to storage. These VMOs integrate with the page cache for efficient file I/O
+///    and memory-mapped files.
 ///
 /// # Features
 ///
@ -44,10 +55,25 @@ pub use pager::Pager;
 ///  * **Device driver support.** If specified upon creation, VMOs will be
 ///    backed by physically contiguous memory pages starting at a target address.
 ///  * **File system support.** By default, a VMO's memory pages are initially
-///    all zeros. But if a VMO is attached to a pager (`Pager`) upon creation,
-///    then its memory pages will be populated by the pager.
-///    With this pager mechanism, file systems can easily implement page caches
-///    with VMOs by attaching the VMOs to pagers backed by inodes.
+///    all zeros. But if a VMO is attached to a backend ([`PageCacheBackend`]) upon creation,
+///    then its memory pages will be populated by the backend.
+///    With this backend mechanism, file systems can easily implement page caches
+///    with VMOs by attaching the VMOs to backends backed by inodes.
+///
+/// # Concurrency Guarantees
+///
+/// A `Vmo` guarantees the correctness of each [`CachePage`]'s [`PageState`]
+/// transitions (e.g., `Uninit` → `UpToDate` → `Dirty`). These transitions are
+/// performed atomically under the page lock, ensuring that concurrent readers
+/// and writers always observe a consistent page state.
+///
+/// However, a `Vmo` does **not** guarantee atomicity of the page *contents*
+/// with respect to concurrent reads and writes. In particular, when a page is
+/// mapped into user-space address space, the kernel cannot prevent data races
+/// between concurrent user-space memory accesses and kernel-side I/O operations
+/// (e.g., `read`/`write` system calls or page fault handling). Callers that
+/// require stronger consistency guarantees must provide their own
+/// synchronization (e.g., file locks or application-level mutexes).
 ///
 /// # Examples
 ///
@ -56,16 +82,21 @@ pub use pager::Pager;
 /// # Implementation
 ///
 /// `Vmo` provides high-level APIs for address space management by wrapping
-/// around its low-level counterpart [`ostd::mm::UFrame`].
-/// Compared with `UFrame`,
+/// around its low-level counterpart [`CachePage`]. Compared with [`CachePage`],
 /// `Vmo` is easier to use (by offering more powerful APIs) and
 /// harder to misuse (thanks to its nature of being capability).
+///
+/// [`PageState`]: crate::fs::utils::PageState
 pub struct Vmo {
-    pager: Option<Arc<dyn Pager>>,
+    /// The backend that provides disk I/O operations, if any.
+    ///
+    /// Using `Weak` here is to avoid circular references in  exfat file systems.
+    /// We should avoid the circular reference by design, and then we can change this to `Arc`.
+    backend: Option<Weak<dyn PageCacheBackend>>,
    /// Flags
    flags: VmoFlags,
    /// The virtual pages where the VMO resides.
-    pages: XArray<UFrame>,
+    pages: XArray<CachePage>,
    /// The size of the VMO.
    ///
    /// Note: This size may not necessarily match the size of the `pages`, but it is
@ -84,6 +115,7 @@ pub struct Vmo {
 impl Debug for Vmo {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        f.debug_struct("Vmo")
+            .field("has_backend", &self.backend.is_some())
            .field("flags", &self.flags)
            .field("size", &self.size)
            .field("writable_mapping_status", &self.writable_mapping_status)
@ -111,11 +143,22 @@ bitflags! {
 /// The error type used for commit operations of [`Vmo`].
 #[derive(Debug)]
 pub enum VmoCommitError {
-    /// Represents a general error raised during the commit operation.
+    /// A general error occurred during the commit operation.
    Err(Error),
-    /// Represents that the commit operation need to do I/O operation on the
-    /// wrapped index.
+    /// The commit operation requires an I/O operation to read the page
+    /// from the backend.
+    ///
+    /// The wrapped value is the page index.
    NeedIo(usize),
+    /// Failed to lock the page because it's currently locked by another thread.
+    ///
+    /// Contains the page index and the page itself.
+    LockPageFailed(usize, CachePage),
+    /// The page exists but is not yet initialized.
+    ///
+    /// The caller should wait for initialization to complete.
+    /// Contains the page index and the page.
+    WaitUntilInit(usize, CachePage),
 }

 impl From<Error> for VmoCommitError {
@ -130,84 +173,54 @@ impl From<ostd::Error> for VmoCommitError {
    }
 }

-bitflags! {
-    /// Commit Flags.
-    pub struct CommitFlags: u8 {
-        /// Set this flag if the page will be completely overwritten.
-        /// This flag contains the WILL_WRITE flag.
-        const WILL_OVERWRITE = 1;
-    }
-}
-
-impl CommitFlags {
-    pub fn will_overwrite(&self) -> bool {
-        self.contains(Self::WILL_OVERWRITE)
-    }
-}
-
 impl Vmo {
-    /// Prepares a new `UFrame` for the target index in pages, returns this new frame.
+    /// Converts this VMO to a disk-backed VMO wrapper if it has a backend.
    ///
-    /// This operation may involve I/O operations if the VMO is backed by a pager.
-    fn prepare_page(&self, page_idx: usize, commit_flags: CommitFlags) -> Result<UFrame> {
-        match &self.pager {
-            None => Ok(FrameAllocOptions::new().alloc_frame()?.into()),
-            Some(pager) => {
-                if commit_flags.will_overwrite() {
-                    pager.commit_overwrite(page_idx)
-                } else {
-                    pager.commit_page(page_idx)
-                }
-            }
-        }
+    /// Returns `None` if this is an anonymous VMO.
+    pub fn as_disk_backed(&self) -> Option<DiskBackedVmo<'_>> {
+        self.backend.as_ref().and_then(|weak_backend| {
+            weak_backend
+                .upgrade()
+                .map(|backend| DiskBackedVmo { vmo: self, backend })
+        })
    }

    /// Commits a page at a specific page index.
    ///
    /// This method may involve I/O operations if the VMO needs to fetch a page from
    /// the underlying page cache.
-    pub fn commit_on(&self, page_idx: usize, commit_flags: CommitFlags) -> Result<UFrame> {
-        let new_page = self.prepare_page(page_idx, commit_flags)?;
+    pub fn commit_on(&self, page_idx: usize) -> Result<CachePage> {
+        self.commit_on_internal(page_idx, false)
+    }

+    fn commit_on_internal(&self, page_idx: usize, will_overwrite: bool) -> Result<CachePage> {
        let mut locked_pages = self.pages.lock();
        if page_idx * PAGE_SIZE > self.size() {
            return_errno_with_message!(Errno::EINVAL, "the offset is outside the VMO");
        }

-        let mut cursor = locked_pages.cursor_mut(page_idx as u64);
-        if let Some(page) = cursor.load() {
-            return Ok(page.clone());
+        if let Some(disk_backed) = self.as_disk_backed() {
+            disk_backed.commit_on(locked_pages, page_idx, will_overwrite)
+        } else {
+            let mut cursor = locked_pages.cursor_mut(page_idx as u64);
+            if let Some(page) = cursor.load() {
+                return Ok(page.clone());
+            }
+
+            let new_page = CachePage::alloc_zero()?;
+            cursor.store(new_page.clone());
+
+            Ok(new_page)
        }
-
-        cursor.store(new_page.clone());
-        Ok(new_page)
-    }
-
-    fn try_commit_with_cursor(
-        &self,
-        cursor: &mut Cursor<'_, UFrame>,
-    ) -> core::result::Result<UFrame, VmoCommitError> {
-        if let Some(committed_page) = cursor.load() {
-            return Ok(committed_page.clone());
-        }
-
-        if let Some(pager) = &self.pager {
-            // FIXME: Here `Vmo` treat all instructions in `pager` as I/O instructions
-            // since it needs to take the inner `Mutex` lock and users also cannot hold a
-            // `SpinLock` to do such instructions. This workaround may introduce some performance
-            // issues. In the future we should solve the redundancy of `Vmo` and the pagecache
-            // make sure return such error when really needing I/Os.
-            return Err(VmoCommitError::NeedIo(cursor.index() as usize));
-        }
-
-        let frame = self.commit_on(cursor.index() as usize, CommitFlags::empty())?;
-        Ok(frame)
    }

    /// Commits the page corresponding to the target offset in the VMO.
    ///
    /// If the commit operation needs to perform I/O, it will return a [`VmoCommitError::NeedIo`].
-    pub fn try_commit_page(&self, offset: usize) -> core::result::Result<UFrame, VmoCommitError> {
+    pub fn try_commit_page(
+        &self,
+        offset: usize,
+    ) -> core::result::Result<CachePage, VmoCommitError> {
        let page_idx = offset / PAGE_SIZE;
        if offset >= self.size() {
            return Err(VmoCommitError::Err(Error::with_message(
@ -218,7 +231,30 @@ impl Vmo {

        let guard = disable_preempt();
        let mut cursor = self.pages.cursor(&guard, page_idx as u64);
-        self.try_commit_with_cursor(&mut cursor)
+        self.try_commit_with_cursor(&mut cursor, false)
+            .map(|(_, page)| page)
+    }
+
+    fn try_commit_with_cursor(
+        &self,
+        cursor: &mut Cursor<'_, CachePage>,
+        will_overwrite: bool,
+    ) -> core::result::Result<(usize, CachePage), VmoCommitError> {
+        if let Some(disk_backed) = self.as_disk_backed() {
+            if let Some((index, page)) =
+                disk_backed.try_commit_with_cursor(cursor, will_overwrite)?
+            {
+                return Ok((index, page));
+            }
+        } else if let Some(page) = cursor.load() {
+            let index = cursor.index() as usize;
+            return Ok((index, page.clone()));
+        }
+
+        // Need to commit. Only Anonymous VMOs can reach here, because disk-backed VMOs will return
+        // `Err` if the page is not loaded.
+        let index = cursor.index() as usize;
+        Ok((index, self.commit_on_internal(index, will_overwrite)?))
    }

    /// Traverses the indices within a specified range of a VMO sequentially.
@ -230,11 +266,25 @@ impl Vmo {
    pub fn try_operate_on_range<F>(
        &self,
        range: &Range<usize>,
-        mut operate: F,
+        operate: F,
    ) -> core::result::Result<(), VmoCommitError>
    where
        F: FnMut(
-            &mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>,
+            &mut dyn FnMut() -> core::result::Result<(usize, CachePage), VmoCommitError>,
+        ) -> core::result::Result<(), VmoCommitError>,
+    {
+        self.try_operate_on_range_internal(range, operate, false)
+    }
+
+    fn try_operate_on_range_internal<F>(
+        &self,
+        range: &Range<usize>,
+        mut operate: F,
+        will_overwrite: bool,
+    ) -> core::result::Result<(), VmoCommitError>
+    where
+        F: FnMut(
+            &mut dyn FnMut() -> core::result::Result<(usize, CachePage), VmoCommitError>,
        ) -> core::result::Result<(), VmoCommitError>,
    {
        if range.end > self.size() {
@ -248,224 +298,231 @@ impl Vmo {
        let guard = disable_preempt();
        let mut cursor = self.pages.cursor(&guard, page_idx_range.start as u64);
        for page_idx in page_idx_range {
-            let mut commit_fn = || self.try_commit_with_cursor(&mut cursor);
+            let mut commit_fn = || self.try_commit_with_cursor(&mut cursor, will_overwrite);
            operate(&mut commit_fn)?;
            cursor.next();
        }
        Ok(())
    }

-    /// Traverses the indices within a specified range of a VMO sequentially.
-    ///
-    /// For each index position, you have the option to commit the page as well as
-    /// perform other operations.
-    ///
-    /// This method may involve I/O operations if the VMO needs to fetch a page from
-    /// the underlying page cache.
-    fn operate_on_range<F>(
-        &self,
-        mut range: Range<usize>,
-        mut operate: F,
-        commit_flags: CommitFlags,
-    ) -> Result<()>
-    where
-        F: FnMut(
-            &mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>,
-        ) -> core::result::Result<(), VmoCommitError>,
-    {
-        'retry: loop {
-            let res = self.try_operate_on_range(&range, &mut operate);
-            match res {
-                Ok(_) => return Ok(()),
-                Err(VmoCommitError::Err(e)) => return Err(e),
-                Err(VmoCommitError::NeedIo(index)) => {
-                    self.commit_on(index, commit_flags)?;
-                    range.start = index * PAGE_SIZE;
-                    continue 'retry;
-                }
-            }
-        }
-    }
-
-    /// Decommits a range of pages in the VMO.
-    ///
-    /// The range must be within the size of the VMO.
-    ///
-    /// The start and end addresses will be rounded down and up to page boundaries.
-    pub fn decommit(&self, range: Range<usize>) -> Result<()> {
-        let locked_pages = self.pages.lock();
-        if range.end > self.size() {
-            return_errno_with_message!(Errno::EINVAL, "operated range exceeds the vmo size");
-        }
-
-        self.decommit_pages(locked_pages, range)?;
-        Ok(())
-    }
-
-    /// Reads the specified amount of buffer content starting from the target offset in the VMO.
-    pub fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()> {
-        let read_len = writer.avail().min(self.size().saturating_sub(offset));
-        let read_range = offset..(offset + read_len);
-        let mut read_offset = offset % PAGE_SIZE;
-
-        let read =
-            move |commit_fn: &mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>| {
-                let frame = commit_fn()?;
-                frame
-                    .reader()
-                    .skip(read_offset)
-                    .read_fallible(writer)
-                    .map_err(|e| VmoCommitError::from(e.0))?;
-                read_offset = 0;
-                Ok(())
-            };
-
-        self.operate_on_range(read_range, read, CommitFlags::empty())
-    }
-
-    /// Writes the specified amount of buffer content starting from the target offset in the VMO.
-    pub fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()> {
-        let write_len = reader.remain();
-        let write_range = offset..(offset + write_len);
-        let mut write_offset = offset % PAGE_SIZE;
-        let mut write =
-            move |commit_fn: &mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>| {
-                let frame = commit_fn()?;
-                frame
-                    .writer()
-                    .skip(write_offset)
-                    .write_fallible(reader)
-                    .map_err(|e| VmoCommitError::from(e.0))?;
-                write_offset = 0;
-                Ok(())
-            };
-
-        if write_range.len() < PAGE_SIZE {
-            self.operate_on_range(write_range.clone(), write, CommitFlags::empty())?;
-        } else {
-            let temp = write_range.start + PAGE_SIZE - 1;
-            let up_align_start = temp - temp % PAGE_SIZE;
-            let down_align_end = write_range.end - write_range.end % PAGE_SIZE;
-            if write_range.start != up_align_start {
-                let head_range = write_range.start..up_align_start;
-                self.operate_on_range(head_range, &mut write, CommitFlags::empty())?;
-            }
-            if up_align_start != down_align_end {
-                let mid_range = up_align_start..down_align_end;
-                self.operate_on_range(mid_range, &mut write, CommitFlags::WILL_OVERWRITE)?;
-            }
-            if down_align_end != write_range.end {
-                let tail_range = down_align_end..write_range.end;
-                self.operate_on_range(tail_range, &mut write, CommitFlags::empty())?;
-            }
-        }
-
-        if let Some(pager) = &self.pager {
-            let page_idx_range = get_page_idx_range(&write_range);
-            for page_idx in page_idx_range {
-                pager.update_page(page_idx)?;
-            }
-        }
-        Ok(())
-    }
-
-    /// Clears the target range in current VMO by writing zeros.
-    pub fn clear(&self, range: Range<usize>) -> Result<()> {
-        let buffer = vec![0u8; range.end - range.start];
-        let mut reader = VmReader::from(buffer.as_slice()).to_fallible();
-        self.write(range.start, &mut reader)?;
-        Ok(())
-    }
-
    /// Returns the size of current VMO.
    pub fn size(&self) -> usize {
        self.size.load(Ordering::Acquire)
    }

-    /// Resizes current VMO to target size.
-    ///
-    /// The VMO must be resizable.
-    ///
-    /// The new size will be rounded up to page boundaries.
-    pub fn resize(&self, new_size: usize) -> Result<()> {
-        assert!(self.flags.contains(VmoFlags::RESIZABLE));
-        let new_size = new_size.align_up(PAGE_SIZE);
-
-        let locked_pages = self.pages.lock();
-
-        let old_size = self.size();
-        if new_size == old_size {
-            return Ok(());
-        }
-
-        self.size.store(new_size, Ordering::Release);
-
-        if new_size < old_size {
-            self.decommit_pages(locked_pages, new_size..old_size)?;
-        }
-
-        Ok(())
-    }
-
-    fn decommit_pages(
-        &self,
-        mut locked_pages: LockedXArray<UFrame>,
-        range: Range<usize>,
-    ) -> Result<()> {
-        let page_idx_range = get_page_idx_range(&range);
-        let mut cursor = locked_pages.cursor_mut(page_idx_range.start as u64);
-
-        let Some(pager) = &self.pager else {
-            cursor.remove();
-            while let Some(page_idx) = cursor.next_present()
-                && page_idx < page_idx_range.end as u64
-            {
-                cursor.remove();
-            }
-            return Ok(());
-        };
-
-        let mut removed_page_idx = Vec::new();
-        if cursor.remove().is_some() {
-            removed_page_idx.push(page_idx_range.start);
-        }
-        while let Some(page_idx) = cursor.next_present()
-            && page_idx < page_idx_range.end as u64
-        {
-            removed_page_idx.push(page_idx as usize);
-            cursor.remove();
-        }
-
-        drop(locked_pages);
-
-        for page_idx in removed_page_idx {
-            pager.decommit_page(page_idx)?;
-        }
-
-        Ok(())
-    }
-
    /// Returns the flags of current VMO.
    pub fn flags(&self) -> VmoFlags {
        self.flags
    }

-    /// Replaces the page at the `page_idx` in the VMO with the input `page`.
-    fn replace(&self, page: UFrame, page_idx: usize) -> Result<()> {
-        let mut locked_pages = self.pages.lock();
-        if page_idx >= self.size() / PAGE_SIZE {
-            return_errno_with_message!(Errno::EINVAL, "the page index is outside of the vmo");
+    /// Returns the status of writable mappings of the VMO.
+    pub fn writable_mapping_status(&self) -> &WritableMappingStatus {
+        // Currently, only VMOs used by `MemfdInode` (anonymous) track writable mapping status.
+        // Disk-backed VMOs do not use this field.
+        debug_assert!(!self.is_disk_backed());
+        &self.writable_mapping_status
+    }
+
+    fn decommit_pages(
+        &self,
+        mut locked_pages: LockedXArray<CachePage>,
+        range: Range<usize>,
+    ) -> Result<()> {
+        let page_idx_range = get_page_idx_range(&range);
+        let mut cursor = locked_pages.cursor_mut(page_idx_range.start as u64);
+
+        loop {
+            cursor.remove();
+            let page_idx = cursor.next_present();
+            if page_idx.is_none_or(|idx| idx >= page_idx_range.end as u64) {
+                break;
+            }
        }

-        locked_pages.store(page_idx as u64, page);
        Ok(())
    }

-    /// Returns the status of writable mappings of the VMO.
-    pub fn writable_mapping_status(&self) -> &WritableMappingStatus {
-        // Only writable file-backed mappings may need to be tracked.
-        debug_assert!(self.pager.is_some());
-        &self.writable_mapping_status
+    /// Returns whether this VMO is disk-backed.
+    fn is_disk_backed(&self) -> bool {
+        self.backend.is_some()
+    }
+}
+
+impl Vmo {
+    /// Reads the specified amount of buffer content starting from the target offset in the VMO.
+    pub fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()> {
+        let read_len = writer.avail().min(self.size().saturating_sub(offset));
+        let mut read_range = offset..(offset + read_len);
+        let mut read_offset = offset % PAGE_SIZE;
+
+        let mut read = move |commit_fn: &mut dyn FnMut() -> core::result::Result<
+            (usize, CachePage),
+            VmoCommitError,
+        >| {
+            let (_, page) = commit_fn()?;
+            page.reader()
+                .skip(read_offset)
+                .read_fallible(writer)
+                .map_err(|e| VmoCommitError::from(e.0))?;
+            read_offset = 0;
+            Ok(())
+        };
+
+        'retry: loop {
+            let res = self.try_operate_on_range(&read_range, &mut read);
+            match res {
+                Ok(_) => return Ok(()),
+                Err(VmoCommitError::Err(e)) => return Err(e),
+                Err(VmoCommitError::NeedIo(index)) => {
+                    self.commit_on(index)?;
+                    read_range.start = index * PAGE_SIZE;
+                    continue 'retry;
+                }
+                Err(VmoCommitError::WaitUntilInit(index, cache_page)) => {
+                    cache_page.ensure_init(|locked_page| {
+                        self.as_disk_backed()
+                            .unwrap()
+                            .backend
+                            .read_page(index, locked_page)
+                    })?;
+                    read_range.start = index * PAGE_SIZE;
+                    continue 'retry;
+                }
+                _ => unreachable!(),
+            }
+        }
+    }
+
+    /// Writes the specified amount of buffer content starting from the target offset in the VMO.
+    pub fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()> {
+        let write_len = reader.remain();
+        let write_range = offset..(offset + write_len);
+        let mut write_offset = offset % PAGE_SIZE;
+
+        if !self.is_disk_backed() {
+            // Fast path for anonymous VMOs
+            let write = move |commit_fn: &mut dyn FnMut() -> core::result::Result<
+                (usize, CachePage),
+                VmoCommitError,
+            >| {
+                let (_, page) = commit_fn()?;
+                page.writer()
+                    .skip(write_offset)
+                    .write_fallible(reader)
+                    .map_err(|e| VmoCommitError::from(e.0))?;
+                write_offset = 0;
+                Ok(())
+            };
+
+            return self.write_on_range(
+                write_range.clone(),
+                write,
+                Option::<fn(&LockedCachePage) -> Result<()>>::None,
+                false,
+            );
+        }
+
+        // Slow path for disk-backed VMOs (with dirty tracking)
+        let reader = RefCell::new(reader);
+        let write_offset = RefCell::new(write_offset);
+        let mut write = |commit_fn: &mut dyn FnMut() -> core::result::Result<
+            (usize, CachePage),
+            VmoCommitError,
+        >| {
+            let (index, page) = commit_fn()?;
+            let locked_page = page
+                .try_lock()
+                .ok_or_else(|| VmoCommitError::LockPageFailed(index, page))?;
+            locked_page.set_dirty();
+            locked_page
+                .writer()
+                .skip(*write_offset.borrow())
+                .write_fallible(&mut reader.borrow_mut())
+                .map_err(|e| VmoCommitError::from(e.0))?;
+            *write_offset.borrow_mut() = 0;
+            Ok(())
+        };
+
+        let mut fallback_write = |locked_page: &LockedCachePage| {
+            locked_page.set_dirty();
+            locked_page
+                .writer()
+                .skip(*write_offset.borrow())
+                .write_fallible(&mut reader.borrow_mut())?;
+            *write_offset.borrow_mut() = 0;
+            Ok(())
+        };
+
+        if write_range.len() < PAGE_SIZE {
+            self.write_on_range(write_range.clone(), write, Some(fallback_write), false)?;
+        } else {
+            // Split into head (unaligned), middle (aligned), and tail (unaligned)
+            let temp = write_range.start + PAGE_SIZE - 1;
+            let up_align_start = temp - temp % PAGE_SIZE;
+            let down_align_end = write_range.end - write_range.end % PAGE_SIZE;
+
+            if write_range.start != up_align_start {
+                let head_range = write_range.start..up_align_start;
+                self.write_on_range(head_range, &mut write, Some(&mut fallback_write), false)?;
+            }
+            if up_align_start != down_align_end {
+                // Middle part is page-aligned and will be completely overwritten
+                let mid_range = up_align_start..down_align_end;
+                self.write_on_range(mid_range, &mut write, Some(&mut fallback_write), true)?;
+            }
+            if down_align_end != write_range.end {
+                let tail_range = down_align_end..write_range.end;
+                self.write_on_range(tail_range, &mut write, Some(&mut fallback_write), false)?;
+            }
+        }
+
+        Ok(())
+    }
+
+    fn write_on_range<F1, F2>(
+        &self,
+        mut range: Range<usize>,
+        mut operate: F1,
+        mut fallback: Option<F2>,
+        will_overwrite: bool,
+    ) -> Result<()>
+    where
+        F1: FnMut(
+            &mut dyn FnMut() -> core::result::Result<(usize, CachePage), VmoCommitError>,
+        ) -> core::result::Result<(), VmoCommitError>,
+        F2: FnMut(&LockedCachePage) -> Result<()>,
+    {
+        'retry: loop {
+            let res = self.try_operate_on_range_internal(&range, &mut operate, will_overwrite);
+            match res {
+                Ok(_) => return Ok(()),
+                Err(VmoCommitError::Err(e)) => return Err(e),
+                Err(VmoCommitError::NeedIo(index)) => {
+                    self.commit_on_internal(index, will_overwrite)?;
+                    range.start = index * PAGE_SIZE;
+                    continue 'retry;
+                }
+                Err(VmoCommitError::WaitUntilInit(index, cache_page)) => {
+                    cache_page.ensure_init(|locked_page| {
+                        self.as_disk_backed()
+                            .unwrap()
+                            .backend
+                            .read_page(index, locked_page)
+                    })?;
+                    range.start = index * PAGE_SIZE;
+                    continue 'retry;
+                }
+                Err(VmoCommitError::LockPageFailed(index, cache_page)) => {
+                    let Some(fallback) = &mut fallback else {
+                        unreachable!()
+                    };
+                    let locked_page = cache_page.lock();
+                    fallback(&locked_page)?;
+                    range.start = (index + 1) * PAGE_SIZE;
+                    continue 'retry;
+                }
+            }
+        }
    }
 }

@ -498,6 +555,169 @@ impl VmIoFill for Vmo {
    }
 }

+/// A wrapper around a disk-backed VMO that provides specialized operations.
+///
+/// This structure is created by calling [`Vmo::as_disk_backed()`] and provides
+/// access to disk-backed specific functionality like reading from storage and
+/// managing dirty pages.
+pub struct DiskBackedVmo<'a> {
+    vmo: &'a Vmo,
+    backend: Arc<dyn PageCacheBackend>,
+}
+
+impl<'a> DiskBackedVmo<'a> {
+    /// Commits a page at the given index for a disk-backed VMO.
+    fn commit_on(
+        &self,
+        mut locked_pages: LockedXArray<'_, CachePage>,
+        page_idx: usize,
+        will_overwrite: bool,
+    ) -> Result<CachePage> {
+        let mut cursor = locked_pages.cursor_mut(page_idx as u64);
+        if let Some(page) = cursor.load() {
+            let page = page.clone();
+            if self.backend.npages() > page_idx {
+                drop(locked_pages);
+                if !will_overwrite {
+                    page.ensure_init(|locked_page| self.backend.read_page(page_idx, locked_page))?;
+                }
+            }
+
+            return Ok(page);
+        };
+
+        // Page is within the file bounds - need to read from backend
+        if self.backend.npages() > page_idx {
+            let new_page = CachePage::alloc_uninit()?;
+            // Acquiring the lock from a new page must succeed.
+            let locked_page = new_page.try_lock().unwrap();
+
+            cursor.store(locked_page.clone());
+
+            drop(locked_pages);
+
+            if will_overwrite {
+                // Page will be completely overwritten, no need to read
+                Ok(locked_page.unlock())
+            } else {
+                // Read the page from backend storage
+                self.backend.read_page(page_idx, locked_page)?;
+                Ok(new_page)
+            }
+        } else {
+            // Page is beyond file bounds - treat as hole (zero-filled)
+            let new_page = CachePage::alloc_zero()?;
+            cursor.store(new_page.clone());
+
+            Ok(new_page)
+        }
+    }
+
+    /// Attempts to commit a page using a cursor, without blocking on I/O.
+    fn try_commit_with_cursor(
+        &self,
+        cursor: &mut Cursor<'_, CachePage>,
+        will_overwrite: bool,
+    ) -> core::result::Result<Option<(usize, CachePage)>, VmoCommitError> {
+        let page_idx = cursor.index() as usize;
+
+        let Some(page) = cursor.load() else {
+            return Err(VmoCommitError::NeedIo(page_idx));
+        };
+
+        // If page is within file bounds, check if it's initialized
+        if !will_overwrite && self.backend.npages() > page_idx && page.is_uninit() {
+            return Err(VmoCommitError::WaitUntilInit(page_idx, page.clone()));
+        }
+
+        Ok(Some((page_idx, page.clone())))
+    }
+
+    /// Collects dirty pages in the specified byte range.
+    ///
+    /// If `remove` is `true`, the pages will be removed from the XArray while
+    /// being collected. Otherwise, the pages are only read.
+    fn collect_dirty_pages(
+        &self,
+        range: &Range<usize>,
+        remove: bool,
+    ) -> Result<Vec<(usize, CachePage)>> {
+        let mut locked_pages = self.vmo.pages.lock();
+        if range.start > self.size() {
+            return Ok(Vec::new());
+        }
+
+        let page_idx_range = get_page_idx_range(range);
+        let npages = self.backend.npages();
+        if page_idx_range.start >= npages {
+            return Ok(Vec::new());
+        }
+
+        let mut dirty_pages = Vec::new();
+
+        if remove {
+            let mut cursor = locked_pages.cursor_mut(page_idx_range.start as u64);
+            if let Some(page) = cursor.remove()
+                && page.is_dirty()
+            {
+                dirty_pages.push((page_idx_range.start, page.clone()));
+            }
+
+            while let Some(page_idx) = cursor.next_present() {
+                let page_idx = page_idx as usize;
+                if page_idx >= page_idx_range.end || page_idx >= npages {
+                    break;
+                }
+
+                let page = cursor.remove().unwrap();
+                if page.is_dirty() {
+                    dirty_pages.push((page_idx, page.clone()));
+                }
+            }
+        } else {
+            let mut cursor = locked_pages.cursor(page_idx_range.start as u64);
+            if let Some(page) = cursor.load()
+                && page.is_dirty()
+            {
+                dirty_pages.push((page_idx_range.start, page.clone()));
+            }
+
+            while let Some(page_idx) = cursor.next_present() {
+                let page_idx = page_idx as usize;
+                if page_idx >= page_idx_range.end || page_idx >= npages {
+                    break;
+                }
+
+                let page = cursor.load().unwrap();
+                if page.is_dirty() {
+                    dirty_pages.push((page_idx, page.clone()));
+                }
+            }
+        }
+
+        Ok(dirty_pages)
+    }
+
+    /// Writes back a collection of dirty pages to the backend storage.
+    fn write_back_pages(&self, dirty_pages: Vec<(usize, CachePage)>) -> Result<()> {
+        for (page_idx, page) in dirty_pages {
+            let locked_page = page.lock();
+            if locked_page.is_dirty() {
+                self.backend.write_page(page_idx, locked_page)?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl Deref for DiskBackedVmo<'_> {
+    type Target = Vmo;
+
+    fn deref(&self) -> &Self::Target {
+        self.vmo
+    }
+}
+
 /// Gets the page index range that contains the offset range of VMO.
 pub fn get_page_idx_range(vmo_offset_range: &Range<usize>) -> Range<usize> {
    let start = vmo_offset_range.start.align_down(PAGE_SIZE);
--- a/kernel/src/vm/vmo/page_cache.rs
+++ b/kernel/src/vm/vmo/page_cache.rs
@ -0,0 +1,72 @@
+// SPDX-License-Identifier: MPL-2.0
+
+use core::{ops::Range, sync::atomic::Ordering};
+
+use align_ext::AlignExt;
+use ostd::mm::VmIoFill;
+
+use crate::{
+    fs::utils::{PageCacheBackend, PageCacheOps},
+    prelude::*,
+    vm::vmo::{Vmo, VmoFlags, VmoOptions},
+};
+
+impl PageCacheOps for Vmo {
+    fn with_capacity(capacity: usize, backend: Weak<dyn PageCacheBackend>) -> Result<Arc<Self>> {
+        VmoOptions::new(capacity)
+            .flags(VmoFlags::RESIZABLE)
+            .backend(backend)
+            .alloc()
+    }
+
+    // TODO: This method also need to unmap the decommitted pages from the page tables.
+    fn resize(&self, new_size: usize, old_size: usize) -> Result<()> {
+        assert!(self.flags.contains(VmoFlags::RESIZABLE));
+
+        if new_size < old_size && !new_size.is_multiple_of(PAGE_SIZE) {
+            let fill_zero_end = old_size.min(new_size.align_up(PAGE_SIZE));
+            PageCacheOps::fill_zeros(self, new_size..fill_zero_end)?;
+        }
+
+        let new_size = new_size.align_up(PAGE_SIZE);
+
+        let locked_pages = self.pages.lock();
+
+        let old_size = self.size();
+        if new_size == old_size {
+            return Ok(());
+        }
+
+        self.size.store(new_size, Ordering::Release);
+
+        if new_size < old_size {
+            self.decommit_pages(locked_pages, new_size..old_size)?;
+        }
+
+        Ok(())
+    }
+
+    fn flush_range(&self, range: Range<usize>) -> Result<()> {
+        let Some(vmo) = self.as_disk_backed() else {
+            return Ok(());
+        };
+
+        let dirty_pages = vmo.collect_dirty_pages(&range, false)?;
+        vmo.write_back_pages(dirty_pages)
+    }
+
+    // TODO: This method also need to unmap the discarded pages from the page tables.
+    fn discard_range(&self, range: Range<usize>) -> Result<()> {
+        let Some(vmo) = self.as_disk_backed() else {
+            return Ok(());
+        };
+
+        let dirty_pages = vmo.collect_dirty_pages(&range, true)?;
+        vmo.write_back_pages(dirty_pages)
+    }
+
+    fn fill_zeros(&self, range: Range<usize>) -> Result<()> {
+        VmIoFill::fill_zeros(self, range.start, range.end - range.start)?;
+        Ok(())
+    }
+}