Refactor page cache implementation
This commit is contained in:
parent
b0407dd517
commit
5de0d191c1
|
|
@ -1,505 +1,133 @@
|
|||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
#![expect(dead_code)]
|
||||
|
||||
use core::{
|
||||
ops::Range,
|
||||
sync::atomic::{AtomicU8, Ordering},
|
||||
ops::{Deref, Range},
|
||||
sync::atomic::{AtomicBool, AtomicU8, Ordering},
|
||||
};
|
||||
|
||||
use align_ext::AlignExt;
|
||||
use aster_block::bio::{BioStatus, BioWaiter};
|
||||
use lru::LruCache;
|
||||
use ostd::{
|
||||
impl_untyped_frame_meta_for,
|
||||
mm::{Frame, FrameAllocOptions, UFrame, VmIoFill},
|
||||
mm::{Frame, FrameAllocOptions, HasPaddr},
|
||||
sync::WaitQueue,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
prelude::*,
|
||||
vm::vmo::{Pager, Vmo, VmoFlags, VmoOptions, get_page_idx_range},
|
||||
};
|
||||
use crate::{prelude::*, vm::vmo::Vmo};
|
||||
|
||||
pub struct PageCache {
|
||||
pages: Arc<Vmo>,
|
||||
manager: Arc<PageCacheManager>,
|
||||
}
|
||||
/// The page cache type.
|
||||
///
|
||||
/// The page cache is implemented using a [`Vmo`]. Typically, a page cache for
|
||||
/// a disk-based file system (e.g., ext2, exfat) is a **disk-backed VMO**, which
|
||||
/// is associated with a [`PageCacheBackend`] that provides I/O operations to read
|
||||
/// from and write to the underlying block device. In contrast, for purely in-memory
|
||||
/// file systems (e.g., ramfs), the page cache is an **anonymous VMO** — it has no
|
||||
/// backend and its pages exist only in RAM.
|
||||
pub type PageCache = Arc<Vmo>;
|
||||
|
||||
impl PageCache {
|
||||
/// Creates an empty size page cache associated with a new backend.
|
||||
pub fn new(backend: Weak<dyn PageCacheBackend>) -> Result<Self> {
|
||||
let manager = Arc::new(PageCacheManager::new(backend));
|
||||
let pages = VmoOptions::new(0)
|
||||
.flags(VmoFlags::RESIZABLE)
|
||||
.pager(manager.clone())
|
||||
.alloc()?;
|
||||
Ok(Self { pages, manager })
|
||||
}
|
||||
/// A trait for page cache operations.
|
||||
///
|
||||
/// The page cache serves as an in-memory buffer between the file system and
|
||||
/// block devices, caching frequently accessed file data to improve performance.
|
||||
pub trait PageCacheOps {
|
||||
/// Creates a new page cache with the specified capacity.
|
||||
fn with_capacity(capacity: usize, backend: Weak<dyn PageCacheBackend>) -> Result<Arc<Self>>;
|
||||
|
||||
/// Creates a page cache associated with an existing backend.
|
||||
/// Resizes the page cache to the target size.
|
||||
///
|
||||
/// The `capacity` is the initial cache size required by the backend.
|
||||
/// This size usually corresponds to the size of the backend.
|
||||
pub fn with_capacity(capacity: usize, backend: Weak<dyn PageCacheBackend>) -> Result<Self> {
|
||||
let manager = Arc::new(PageCacheManager::new(backend));
|
||||
let pages = VmoOptions::new(capacity)
|
||||
.flags(VmoFlags::RESIZABLE)
|
||||
.pager(manager.clone())
|
||||
.alloc()?;
|
||||
Ok(Self { pages, manager })
|
||||
}
|
||||
|
||||
/// Returns the Vmo object.
|
||||
pub fn pages(&self) -> &Arc<Vmo> {
|
||||
&self.pages
|
||||
}
|
||||
|
||||
/// Evict the data within a specified range from the page cache and persist
|
||||
/// them to the backend.
|
||||
pub fn evict_range(&self, range: Range<usize>) -> Result<()> {
|
||||
self.manager.evict_range(range)
|
||||
}
|
||||
|
||||
/// Evict the data within a specified range from the page cache without persisting
|
||||
/// them to the backend.
|
||||
pub fn discard_range(&self, range: Range<usize>) {
|
||||
self.manager.discard_range(range)
|
||||
}
|
||||
|
||||
/// Returns the backend.
|
||||
pub fn backend(&self) -> Arc<dyn PageCacheBackend> {
|
||||
self.manager.backend()
|
||||
}
|
||||
|
||||
/// Resizes the current page cache to a target size.
|
||||
pub fn resize(&self, new_size: usize) -> Result<()> {
|
||||
// If the new size is smaller and not page-aligned,
|
||||
// first zero the gap between the new size and the
|
||||
// next page boundary (or the old size), if such a gap exists.
|
||||
let old_size = self.pages.size();
|
||||
if old_size > new_size && !new_size.is_multiple_of(PAGE_SIZE) {
|
||||
let gap_size = old_size.min(new_size.align_up(PAGE_SIZE)) - new_size;
|
||||
if gap_size > 0 {
|
||||
self.fill_zeros(new_size..new_size + gap_size)?;
|
||||
}
|
||||
}
|
||||
self.pages.resize(new_size)
|
||||
}
|
||||
|
||||
/// Fill the specified range with zeros in the page cache.
|
||||
pub fn fill_zeros(&self, range: Range<usize>) -> Result<()> {
|
||||
if range.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let (start, end) = (range.start, range.end);
|
||||
|
||||
// Write zeros to the first partial page if any
|
||||
let first_page_end = start.align_up(PAGE_SIZE);
|
||||
if first_page_end > start {
|
||||
let zero_len = first_page_end.min(end) - start;
|
||||
self.pages().fill_zeros(start, zero_len)?;
|
||||
}
|
||||
|
||||
// Write zeros to the last partial page if any
|
||||
let last_page_start = end.align_down(PAGE_SIZE);
|
||||
if last_page_start < end && last_page_start >= start {
|
||||
let zero_len = end - last_page_start;
|
||||
self.pages().fill_zeros(last_page_start, zero_len)?;
|
||||
}
|
||||
|
||||
for offset in (first_page_end..last_page_start).step_by(PAGE_SIZE) {
|
||||
self.pages().fill_zeros(offset, PAGE_SIZE)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PageCache {
|
||||
fn drop(&mut self) {
|
||||
// TODO:
|
||||
// The default destruction procedure exhibits slow performance.
|
||||
// In contrast, resizing the `VMO` to zero greatly accelerates the process.
|
||||
// We need to find out the underlying cause of this discrepancy.
|
||||
let _ = self.pages.resize(0);
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for PageCache {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
f.debug_struct("PageCache")
|
||||
.field("size", &self.pages.size())
|
||||
.field("manager", &self.manager)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
struct ReadaheadWindow {
|
||||
/// The window.
|
||||
window: Range<usize>,
|
||||
/// Look ahead position in the current window, where the readahead is triggered.
|
||||
/// TODO: We set the `lookahead_index` to the start of the window for now.
|
||||
/// This should be adjustable by the user.
|
||||
lookahead_index: usize,
|
||||
}
|
||||
|
||||
impl ReadaheadWindow {
|
||||
pub fn new(window: Range<usize>) -> Self {
|
||||
let lookahead_index = window.start;
|
||||
Self {
|
||||
window,
|
||||
lookahead_index,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the next readahead window.
|
||||
/// Most of the time, we push the window forward and double its size.
|
||||
/// The `new_size` will be rounded up to page boundaries. If the new size is smaller
|
||||
/// than the current size, pages that fall entirely within the truncated range will be
|
||||
/// decommitted (freed). For the page that is only partially truncated (i.e., the page
|
||||
/// containing the new boundary), the truncated portion will be filled with zeros instead.
|
||||
///
|
||||
/// The `max_size` is the maximum size of the window.
|
||||
/// The `max_page` is the total page number of the file, and the window should not
|
||||
/// exceed the scope of the file.
|
||||
pub fn next(&self, max_size: usize, max_page: usize) -> Self {
|
||||
let new_start = self.window.end;
|
||||
let cur_size = self.window.end - self.window.start;
|
||||
let new_size = (cur_size * 2).min(max_size).min(max_page - new_start);
|
||||
Self {
|
||||
window: new_start..(new_start + new_size),
|
||||
lookahead_index: new_start,
|
||||
}
|
||||
}
|
||||
/// The `old_size` represents the actual used range of the page cache (i.e., the logical
|
||||
/// size of the cached content), which may differ from the total capacity of the page cache.
|
||||
/// It is used to determine the boundary of the previously valid data so that only the
|
||||
/// discarded logical range (from `new_size` to `old_size`) within a partially truncated
|
||||
/// page needs to be zero-filled.
|
||||
fn resize(&self, new_size: usize, old_size: usize) -> Result<()>;
|
||||
|
||||
pub fn lookahead_index(&self) -> usize {
|
||||
self.lookahead_index
|
||||
}
|
||||
/// Flushes the dirty pages in the specified range to the backend storage.
|
||||
///
|
||||
/// This operation ensures that any modifications made to the pages within the given
|
||||
/// range are persisted to the underlying storage device or file system.
|
||||
///
|
||||
/// If the given range exceeds the current size of the page cache, only the pages within
|
||||
/// the valid range will be flushed.
|
||||
fn flush_range(&self, range: Range<usize>) -> Result<()>;
|
||||
|
||||
pub fn readahead_index(&self) -> usize {
|
||||
self.window.end
|
||||
}
|
||||
/// Discards the pages within the specified range from the page cache.
|
||||
///
|
||||
/// This operation will first **flush** the dirty pages in the range to the backend storage,
|
||||
/// ensuring that any modifications are persisted. After flushing, the pages are removed
|
||||
/// from the page cache. This is useful for invalidating cached data that is no longer needed
|
||||
/// or has become stale.
|
||||
fn discard_range(&self, range: Range<usize>) -> Result<()>;
|
||||
|
||||
pub fn readahead_range(&self) -> Range<usize> {
|
||||
self.window.clone()
|
||||
}
|
||||
}
|
||||
|
||||
struct ReadaheadState {
|
||||
/// Current readahead window.
|
||||
ra_window: Option<ReadaheadWindow>,
|
||||
/// Maximum window size.
|
||||
max_size: usize,
|
||||
/// The last page visited, used to determine sequential I/O.
|
||||
prev_page: Option<usize>,
|
||||
/// Readahead requests waiter.
|
||||
waiter: BioWaiter,
|
||||
}
|
||||
|
||||
impl ReadaheadState {
|
||||
const INIT_WINDOW_SIZE: usize = 4;
|
||||
const DEFAULT_MAX_SIZE: usize = 32;
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
ra_window: None,
|
||||
max_size: Self::DEFAULT_MAX_SIZE,
|
||||
prev_page: None,
|
||||
waiter: BioWaiter::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets the maximum readahead window size.
|
||||
pub fn set_max_window_size(&mut self, size: usize) {
|
||||
self.max_size = size;
|
||||
}
|
||||
|
||||
fn is_sequential(&self, idx: usize) -> bool {
|
||||
if let Some(prev) = self.prev_page {
|
||||
idx == prev || idx == prev + 1
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of bio requests in waiter.
|
||||
/// This number will be zero if there are no previous readahead.
|
||||
pub fn request_number(&self) -> usize {
|
||||
self.waiter.nreqs()
|
||||
}
|
||||
|
||||
/// Checks for the previous readahead.
|
||||
/// Returns true if the previous readahead has been completed.
|
||||
pub fn prev_readahead_is_completed(&self) -> bool {
|
||||
let nreqs = self.request_number();
|
||||
if nreqs == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
for i in 0..nreqs {
|
||||
if self.waiter.status(i) == BioStatus::Submit {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Waits for the previous readahead.
|
||||
pub fn wait_for_prev_readahead(
|
||||
&mut self,
|
||||
pages: &mut MutexGuard<LruCache<usize, CachePage>>,
|
||||
) -> Result<()> {
|
||||
if matches!(self.waiter.wait(), Some(BioStatus::Complete)) {
|
||||
let Some(window) = &self.ra_window else {
|
||||
return_errno!(Errno::EINVAL)
|
||||
};
|
||||
for idx in window.readahead_range() {
|
||||
if let Some(page) = pages.get_mut(&idx) {
|
||||
page.store_state(PageState::UpToDate);
|
||||
}
|
||||
}
|
||||
self.waiter.clear();
|
||||
} else {
|
||||
return_errno!(Errno::EIO)
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Determines whether a new readahead should be performed.
|
||||
/// We only consider readahead for sequential I/O now.
|
||||
/// There should be at most one in-progress readahead.
|
||||
pub fn should_readahead(&self, idx: usize, max_page: usize) -> bool {
|
||||
if self.request_number() == 0 && self.is_sequential(idx) {
|
||||
if let Some(cur_window) = &self.ra_window {
|
||||
let trigger_readahead =
|
||||
idx == cur_window.lookahead_index() || idx == cur_window.readahead_index();
|
||||
let next_window_exist = cur_window.readahead_range().end < max_page;
|
||||
trigger_readahead && next_window_exist
|
||||
} else {
|
||||
let new_window_start = idx + 1;
|
||||
new_window_start < max_page
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Setup the new readahead window.
|
||||
pub fn setup_window(&mut self, idx: usize, max_page: usize) {
|
||||
let new_window = if let Some(cur_window) = &self.ra_window {
|
||||
cur_window.next(self.max_size, max_page)
|
||||
} else {
|
||||
let start_idx = idx + 1;
|
||||
let init_size = Self::INIT_WINDOW_SIZE.min(self.max_size);
|
||||
let end_idx = (start_idx + init_size).min(max_page);
|
||||
ReadaheadWindow::new(start_idx..end_idx)
|
||||
};
|
||||
self.ra_window = Some(new_window);
|
||||
}
|
||||
|
||||
/// Conducts the new readahead.
|
||||
/// Sends the relevant read request and sets the relevant page in the page cache to `Uninit`.
|
||||
pub fn conduct_readahead(
|
||||
&mut self,
|
||||
pages: &mut MutexGuard<LruCache<usize, CachePage>>,
|
||||
backend: Arc<dyn PageCacheBackend>,
|
||||
) -> Result<()> {
|
||||
let Some(window) = &self.ra_window else {
|
||||
return_errno!(Errno::EINVAL)
|
||||
};
|
||||
for async_idx in window.readahead_range() {
|
||||
let mut async_page = CachePage::alloc_uninit()?;
|
||||
let pg_waiter = backend.read_page_async(async_idx, &async_page)?;
|
||||
if pg_waiter.nreqs() > 0 {
|
||||
self.waiter.concat(pg_waiter);
|
||||
} else {
|
||||
// Some backends (e.g. RamFs) do not issue requests, but fill the page directly.
|
||||
async_page.store_state(PageState::UpToDate);
|
||||
}
|
||||
pages.put(async_idx, async_page);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Sets the last page visited.
|
||||
pub fn set_prev_page(&mut self, idx: usize) {
|
||||
self.prev_page = Some(idx);
|
||||
}
|
||||
}
|
||||
|
||||
struct PageCacheManager {
|
||||
pages: Mutex<LruCache<usize, CachePage>>,
|
||||
backend: Weak<dyn PageCacheBackend>,
|
||||
ra_state: Mutex<ReadaheadState>,
|
||||
}
|
||||
|
||||
impl PageCacheManager {
|
||||
pub fn new(backend: Weak<dyn PageCacheBackend>) -> Self {
|
||||
Self {
|
||||
pages: Mutex::new(LruCache::unbounded()),
|
||||
backend,
|
||||
ra_state: Mutex::new(ReadaheadState::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn backend(&self) -> Arc<dyn PageCacheBackend> {
|
||||
self.backend.upgrade().unwrap()
|
||||
}
|
||||
|
||||
// Discard pages without writing them back to disk.
|
||||
pub fn discard_range(&self, range: Range<usize>) {
|
||||
let page_idx_range = get_page_idx_range(&range);
|
||||
let mut pages = self.pages.lock();
|
||||
for idx in page_idx_range {
|
||||
pages.pop(&idx);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn evict_range(&self, range: Range<usize>) -> Result<()> {
|
||||
let page_idx_range = get_page_idx_range(&range);
|
||||
|
||||
let mut bio_waiter = BioWaiter::new();
|
||||
let mut pages = self.pages.lock();
|
||||
let backend = self.backend();
|
||||
let backend_npages = backend.npages();
|
||||
for idx in page_idx_range.start..page_idx_range.end {
|
||||
if let Some(page) = pages.peek(&idx)
|
||||
&& page.load_state() == PageState::Dirty
|
||||
&& idx < backend_npages
|
||||
{
|
||||
let waiter = backend.write_page_async(idx, page)?;
|
||||
bio_waiter.concat(waiter);
|
||||
}
|
||||
}
|
||||
|
||||
if !matches!(bio_waiter.wait(), Some(BioStatus::Complete)) {
|
||||
// Do not allow partial failure
|
||||
return_errno!(Errno::EIO);
|
||||
}
|
||||
|
||||
for (_, page) in pages
|
||||
.iter_mut()
|
||||
.filter(|(idx, _)| page_idx_range.contains(*idx))
|
||||
{
|
||||
page.store_state(PageState::UpToDate);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ondemand_readahead(&self, idx: usize) -> Result<UFrame> {
|
||||
let mut pages = self.pages.lock();
|
||||
let mut ra_state = self.ra_state.lock();
|
||||
let backend = self.backend();
|
||||
// Checks for the previous readahead.
|
||||
if ra_state.prev_readahead_is_completed() {
|
||||
ra_state.wait_for_prev_readahead(&mut pages)?;
|
||||
}
|
||||
// There are three possible conditions that could be encountered upon reaching here.
|
||||
// 1. The requested page is ready for read in page cache.
|
||||
// 2. The requested page is in previous readahead range, not ready for now.
|
||||
// 3. The requested page is on disk, need a sync read operation here.
|
||||
let frame = if let Some(page) = pages.get(&idx) {
|
||||
// Cond 1 & 2.
|
||||
if let PageState::Uninit = page.load_state() {
|
||||
// Cond 2: We should wait for the previous readahead.
|
||||
// If there is no previous readahead, an error must have occurred somewhere.
|
||||
assert!(ra_state.request_number() != 0);
|
||||
ra_state.wait_for_prev_readahead(&mut pages)?;
|
||||
pages.get(&idx).unwrap().clone()
|
||||
} else {
|
||||
// Cond 1.
|
||||
page.clone()
|
||||
}
|
||||
} else {
|
||||
// Cond 3.
|
||||
// Conducts the sync read operation.
|
||||
let page = if idx < backend.npages() {
|
||||
let mut page = CachePage::alloc_uninit()?;
|
||||
backend.read_page(idx, &page)?;
|
||||
page.store_state(PageState::UpToDate);
|
||||
page
|
||||
} else {
|
||||
CachePage::alloc_zero(PageState::Uninit)?
|
||||
};
|
||||
let frame = page.clone();
|
||||
pages.put(idx, page);
|
||||
frame
|
||||
};
|
||||
if ra_state.should_readahead(idx, backend.npages()) {
|
||||
ra_state.setup_window(idx, backend.npages());
|
||||
ra_state.conduct_readahead(&mut pages, backend)?;
|
||||
}
|
||||
ra_state.set_prev_page(idx);
|
||||
Ok(frame.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for PageCacheManager {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
f.debug_struct("PageCacheManager")
|
||||
.field("pages", &self.pages.lock())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Pager for PageCacheManager {
|
||||
fn commit_page(&self, idx: usize) -> Result<UFrame> {
|
||||
self.ondemand_readahead(idx)
|
||||
}
|
||||
|
||||
fn update_page(&self, idx: usize) -> Result<()> {
|
||||
let mut pages = self.pages.lock();
|
||||
if let Some(page) = pages.get_mut(&idx) {
|
||||
page.store_state(PageState::Dirty);
|
||||
} else {
|
||||
warn!("The page {} is not in page cache", idx);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn decommit_page(&self, idx: usize) -> Result<()> {
|
||||
let page_result = self.pages.lock().pop(&idx);
|
||||
if let Some(page) = page_result
|
||||
&& let PageState::Dirty = page.load_state()
|
||||
{
|
||||
let Some(backend) = self.backend.upgrade() else {
|
||||
return Ok(());
|
||||
};
|
||||
if idx < backend.npages() {
|
||||
backend.write_page(idx, &page)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn commit_overwrite(&self, idx: usize) -> Result<UFrame> {
|
||||
if let Some(page) = self.pages.lock().get(&idx) {
|
||||
return Ok(page.clone().into());
|
||||
}
|
||||
|
||||
let page = CachePage::alloc_uninit()?;
|
||||
Ok(self.pages.lock().get_or_insert(idx, || page).clone().into())
|
||||
}
|
||||
/// Fills the specified range of the page cache with zeros.
|
||||
fn fill_zeros(&self, range: Range<usize>) -> Result<()>;
|
||||
}
|
||||
|
||||
/// A page in the page cache.
|
||||
pub type CachePage = Frame<CachePageMeta>;
|
||||
|
||||
const PAGE_WAIT_QUEUE_MASK: usize = 0xff;
|
||||
const PAGE_WAIT_QUEUE_NUM: usize = PAGE_WAIT_QUEUE_MASK + 1;
|
||||
|
||||
/// Global array of wait queues for page cache operations.
|
||||
///
|
||||
/// Each wait queue in this array handles wait/wake operations for a subset of cache pages.
|
||||
/// The queue for a specific page is selected using: `PAGE_WAIT_QUEUES[page.paddr() & PAGE_WAIT_QUEUE_MASK]`.
|
||||
///
|
||||
/// This approach avoids the overhead of per-page wait queues while still providing
|
||||
/// reasonable concurrency through hashing.
|
||||
static PAGE_WAIT_QUEUES: [WaitQueue; PAGE_WAIT_QUEUE_NUM] =
|
||||
[const { WaitQueue::new() }; PAGE_WAIT_QUEUE_NUM];
|
||||
|
||||
/// Metadata for a page in the page cache.
|
||||
#[derive(Debug)]
|
||||
pub struct CachePageMeta {
|
||||
pub state: AtomicPageState,
|
||||
/// The current state of the page (uninit, up-to-date, or dirty).
|
||||
state: AtomicPageState,
|
||||
/// This bit acts as a mutex for the corresponding page.
|
||||
///
|
||||
/// When this bit is set, the holder has the exclusive right to perform critical
|
||||
/// state transitions (e.g., preparing for I/O).
|
||||
lock: AtomicBool,
|
||||
// TODO: Add a reverse mapping from the page to VMO for eviction.
|
||||
}
|
||||
|
||||
impl Default for CachePageMeta {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
state: AtomicPageState::new(PageState::Uninit),
|
||||
lock: AtomicBool::new(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_untyped_frame_meta_for!(CachePageMeta);
|
||||
|
||||
pub trait CachePageExt {
|
||||
pub trait CachePageExt: Sized {
|
||||
/// Gets the metadata associated with the cache page.
|
||||
fn metadata(&self) -> &CachePageMeta;
|
||||
|
||||
/// Gets the wait queue associated with the cache page.
|
||||
fn wait_queue(&self) -> &'static WaitQueue;
|
||||
|
||||
/// Tries to lock the cache page.
|
||||
fn try_lock(&self) -> Option<LockedCachePage>;
|
||||
|
||||
/// Locks the cache page, blocking until the lock is acquired.
|
||||
fn lock(self) -> LockedCachePage;
|
||||
|
||||
/// Ensures the page is initialized, calling `init_fn` if necessary.
|
||||
fn ensure_init(&self, init_fn: impl Fn(LockedCachePage) -> Result<()>) -> Result<()>;
|
||||
|
||||
/// Allocates a new cache page which content and state are uninitialized.
|
||||
fn alloc_uninit() -> Result<CachePage> {
|
||||
let meta = CachePageMeta {
|
||||
state: AtomicPageState::new(PageState::Uninit),
|
||||
lock: AtomicBool::new(false),
|
||||
};
|
||||
let page = FrameAllocOptions::new()
|
||||
.zeroed(false)
|
||||
|
|
@ -508,9 +136,10 @@ pub trait CachePageExt {
|
|||
}
|
||||
|
||||
/// Allocates a new zeroed cache page with the wanted state.
|
||||
fn alloc_zero(state: PageState) -> Result<CachePage> {
|
||||
fn alloc_zero() -> Result<CachePage> {
|
||||
let meta = CachePageMeta {
|
||||
state: AtomicPageState::new(state),
|
||||
state: AtomicPageState::new(PageState::UpToDate),
|
||||
lock: AtomicBool::new(false),
|
||||
};
|
||||
let page = FrameAllocOptions::new()
|
||||
.zeroed(true)
|
||||
|
|
@ -518,14 +147,19 @@ pub trait CachePageExt {
|
|||
Ok(page)
|
||||
}
|
||||
|
||||
/// Loads the current state of the cache page.
|
||||
fn load_state(&self) -> PageState {
|
||||
self.metadata().state.load(Ordering::Relaxed)
|
||||
fn is_uninit(&self) -> bool {
|
||||
matches!(
|
||||
self.metadata().state.load(Ordering::Acquire),
|
||||
PageState::Uninit
|
||||
)
|
||||
}
|
||||
|
||||
/// Stores a new state for the cache page.
|
||||
fn store_state(&mut self, new_state: PageState) {
|
||||
self.metadata().state.store(new_state, Ordering::Relaxed);
|
||||
/// Checks if the page is dirty.
|
||||
fn is_dirty(&self) -> bool {
|
||||
matches!(
|
||||
self.metadata().state.load(Ordering::Acquire),
|
||||
PageState::Dirty
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -533,8 +167,124 @@ impl CachePageExt for CachePage {
|
|||
fn metadata(&self) -> &CachePageMeta {
|
||||
self.meta()
|
||||
}
|
||||
|
||||
fn wait_queue(&self) -> &'static WaitQueue {
|
||||
&PAGE_WAIT_QUEUES[self.paddr() & PAGE_WAIT_QUEUE_MASK]
|
||||
}
|
||||
|
||||
fn try_lock(&self) -> Option<LockedCachePage> {
|
||||
let wait_queue = self.wait_queue();
|
||||
self.metadata()
|
||||
.lock
|
||||
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
|
||||
.is_ok()
|
||||
.then(|| LockedCachePage::new(self.clone(), wait_queue))
|
||||
}
|
||||
|
||||
fn lock(self) -> LockedCachePage {
|
||||
let wait_queue = self.wait_queue();
|
||||
self.wait_queue().wait_until(|| {
|
||||
self.metadata()
|
||||
.lock
|
||||
.compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)
|
||||
.ok()
|
||||
});
|
||||
LockedCachePage::new(self, wait_queue)
|
||||
}
|
||||
|
||||
fn ensure_init(&self, init_fn: impl Fn(LockedCachePage) -> Result<()>) -> Result<()> {
|
||||
// Fast path: if the page is already initialized, return immediately without waiting.
|
||||
if !self.is_uninit() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let lock_page = self.clone().lock();
|
||||
// Check again after acquiring the lock to avoid duplicate initialization.
|
||||
if !lock_page.is_uninit() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
init_fn(lock_page)
|
||||
}
|
||||
}
|
||||
|
||||
/// A locked cache page.
|
||||
///
|
||||
/// The locked page has the exclusive right to perform critical
|
||||
/// state transitions (e.g., preparing for I/O).
|
||||
pub struct LockedCachePage {
|
||||
page: Option<CachePage>,
|
||||
wait_queue: &'static WaitQueue,
|
||||
}
|
||||
|
||||
impl Debug for LockedCachePage {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
f.debug_struct("LockedCachePage")
|
||||
.field("page", &self.page)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl LockedCachePage {
|
||||
fn new(page: CachePage, wait_queue: &'static WaitQueue) -> Self {
|
||||
Self {
|
||||
page: Some(page),
|
||||
wait_queue,
|
||||
}
|
||||
}
|
||||
|
||||
/// Unlocks the page and returns the underlying cache page.
|
||||
pub fn unlock(mut self) -> CachePage {
|
||||
let page = self.page.take().expect("page already taken");
|
||||
page.metadata().lock.store(false, Ordering::Release);
|
||||
self.wait_queue.wake_all();
|
||||
page
|
||||
}
|
||||
|
||||
fn page(&self) -> &CachePage {
|
||||
self.page.as_ref().expect("page already taken")
|
||||
}
|
||||
|
||||
/// Marks the page as up-to-date.
|
||||
///
|
||||
/// This indicates that the page's contents are synchronized with disk
|
||||
/// and can be safely read.
|
||||
pub fn set_up_to_date(&self) {
|
||||
self.page()
|
||||
.metadata()
|
||||
.state
|
||||
.store(PageState::UpToDate, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Marks the page as dirty.
|
||||
///
|
||||
/// This indicates that the page has been modified and needs to be
|
||||
/// written back to disk eventually.
|
||||
pub fn set_dirty(&self) {
|
||||
self.metadata()
|
||||
.state
|
||||
.store(PageState::Dirty, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for LockedCachePage {
|
||||
type Target = CachePage;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.page.as_ref().expect("page already taken")
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for LockedCachePage {
|
||||
fn drop(&mut self) {
|
||||
if let Some(page) = &self.page {
|
||||
page.metadata().lock.store(false, Ordering::Release);
|
||||
self.wait_queue.wake_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The state of a page in the page cache.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
pub enum PageState {
|
||||
|
|
@ -580,25 +330,26 @@ impl AtomicPageState {
|
|||
/// This trait represents the backend for the page cache.
|
||||
pub trait PageCacheBackend: Sync + Send {
|
||||
/// Reads a page from the backend asynchronously.
|
||||
fn read_page_async(&self, idx: usize, frame: &CachePage) -> Result<BioWaiter>;
|
||||
fn read_page_async(&self, idx: usize, frame: LockedCachePage) -> Result<BioWaiter>;
|
||||
/// Writes a page to the backend asynchronously.
|
||||
fn write_page_async(&self, idx: usize, frame: &CachePage) -> Result<BioWaiter>;
|
||||
fn write_page_async(&self, idx: usize, frame: LockedCachePage) -> Result<BioWaiter>;
|
||||
/// Returns the number of pages in the backend.
|
||||
fn npages(&self) -> usize;
|
||||
}
|
||||
|
||||
impl dyn PageCacheBackend {
|
||||
/// Reads a page from the backend synchronously.
|
||||
fn read_page(&self, idx: usize, frame: &CachePage) -> Result<()> {
|
||||
let waiter = self.read_page_async(idx, frame)?;
|
||||
pub fn read_page(&self, idx: usize, page: LockedCachePage) -> Result<()> {
|
||||
let waiter = self.read_page_async(idx, page)?;
|
||||
match waiter.wait() {
|
||||
Some(BioStatus::Complete) => Ok(()),
|
||||
_ => return_errno!(Errno::EIO),
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes a page to the backend synchronously.
|
||||
fn write_page(&self, idx: usize, frame: &CachePage) -> Result<()> {
|
||||
let waiter = self.write_page_async(idx, frame)?;
|
||||
pub fn write_page(&self, idx: usize, page: LockedCachePage) -> Result<()> {
|
||||
let waiter = self.write_page_async(idx, page)?;
|
||||
match waiter.wait() {
|
||||
Some(BioStatus::Complete) => Ok(()),
|
||||
_ => return_errno!(Errno::EIO),
|
||||
|
|
|
|||
|
|
@ -6,34 +6,45 @@
|
|||
//! Virtual Memory Objects (VMOs).
|
||||
|
||||
use core::{
|
||||
ops::Range,
|
||||
cell::RefCell,
|
||||
ops::{Deref, Range},
|
||||
sync::atomic::{AtomicIsize, AtomicUsize, Ordering},
|
||||
};
|
||||
|
||||
use align_ext::AlignExt;
|
||||
use ostd::{
|
||||
mm::{
|
||||
FrameAllocOptions, UFrame, VmIo, VmIoFill, VmReader, VmWriter, io_util::HasVmReaderWriter,
|
||||
},
|
||||
mm::{VmIo, VmIoFill, VmReader, VmWriter, io_util::HasVmReaderWriter},
|
||||
task::disable_preempt,
|
||||
};
|
||||
use xarray::{Cursor, LockedXArray, XArray};
|
||||
|
||||
use crate::prelude::*;
|
||||
use crate::{
|
||||
fs::utils::{CachePage, CachePageExt, LockedCachePage, PageCacheBackend},
|
||||
prelude::*,
|
||||
};
|
||||
|
||||
mod options;
|
||||
mod pager;
|
||||
mod page_cache;
|
||||
|
||||
pub use options::VmoOptions;
|
||||
pub use pager::Pager;
|
||||
|
||||
/// Virtual Memory Objects (VMOs) are a type of capability that represents a
|
||||
/// range of memory pages.
|
||||
/// Virtual Memory Objects (VMOs) represent contiguous ranges of virtual memory pages.
|
||||
///
|
||||
/// Broadly speaking, there are two types of VMO:
|
||||
/// 1. File-backed VMO: the VMO backed by a file and resides in the page cache,
|
||||
/// which includes a [`Pager`] to provide it with actual pages.
|
||||
/// 2. Anonymous VMO: the VMO without a file backup, which does not have a `Pager`.
|
||||
/// VMOs serve as the fundamental building blocks for memory management in Asterinas,
|
||||
/// providing a unified interface for both anonymous (RAM-backed) and disk-backed memory.
|
||||
///
|
||||
/// # Types of VMOs
|
||||
///
|
||||
/// There are two primary types of VMOs, distinguished by their backing storage:
|
||||
///
|
||||
/// 1. **Anonymous VMO**: Backed purely by RAM with no persistent storage. Pages are
|
||||
/// initially zero-filled and exist only in memory. These are typically used for
|
||||
/// heap allocations, anonymous mappings, and stack memory.
|
||||
///
|
||||
/// 2. **Disk-backed VMO**: Associated with a disk-backed file through a [`PageCacheBackend`].
|
||||
/// Pages are lazily loaded from the file on first access and can be written back
|
||||
/// to storage. These VMOs integrate with the page cache for efficient file I/O
|
||||
/// and memory-mapped files.
|
||||
///
|
||||
/// # Features
|
||||
///
|
||||
|
|
@ -44,10 +55,25 @@ pub use pager::Pager;
|
|||
/// * **Device driver support.** If specified upon creation, VMOs will be
|
||||
/// backed by physically contiguous memory pages starting at a target address.
|
||||
/// * **File system support.** By default, a VMO's memory pages are initially
|
||||
/// all zeros. But if a VMO is attached to a pager (`Pager`) upon creation,
|
||||
/// then its memory pages will be populated by the pager.
|
||||
/// With this pager mechanism, file systems can easily implement page caches
|
||||
/// with VMOs by attaching the VMOs to pagers backed by inodes.
|
||||
/// all zeros. But if a VMO is attached to a backend ([`PageCacheBackend`]) upon creation,
|
||||
/// then its memory pages will be populated by the backend.
|
||||
/// With this backend mechanism, file systems can easily implement page caches
|
||||
/// with VMOs by attaching the VMOs to backends backed by inodes.
|
||||
///
|
||||
/// # Concurrency Guarantees
|
||||
///
|
||||
/// A `Vmo` guarantees the correctness of each [`CachePage`]'s [`PageState`]
|
||||
/// transitions (e.g., `Uninit` → `UpToDate` → `Dirty`). These transitions are
|
||||
/// performed atomically under the page lock, ensuring that concurrent readers
|
||||
/// and writers always observe a consistent page state.
|
||||
///
|
||||
/// However, a `Vmo` does **not** guarantee atomicity of the page *contents*
|
||||
/// with respect to concurrent reads and writes. In particular, when a page is
|
||||
/// mapped into user-space address space, the kernel cannot prevent data races
|
||||
/// between concurrent user-space memory accesses and kernel-side I/O operations
|
||||
/// (e.g., `read`/`write` system calls or page fault handling). Callers that
|
||||
/// require stronger consistency guarantees must provide their own
|
||||
/// synchronization (e.g., file locks or application-level mutexes).
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
|
|
@ -56,16 +82,21 @@ pub use pager::Pager;
|
|||
/// # Implementation
|
||||
///
|
||||
/// `Vmo` provides high-level APIs for address space management by wrapping
|
||||
/// around its low-level counterpart [`ostd::mm::UFrame`].
|
||||
/// Compared with `UFrame`,
|
||||
/// around its low-level counterpart [`CachePage`]. Compared with [`CachePage`],
|
||||
/// `Vmo` is easier to use (by offering more powerful APIs) and
|
||||
/// harder to misuse (thanks to its nature of being capability).
|
||||
///
|
||||
/// [`PageState`]: crate::fs::utils::PageState
|
||||
pub struct Vmo {
|
||||
pager: Option<Arc<dyn Pager>>,
|
||||
/// The backend that provides disk I/O operations, if any.
|
||||
///
|
||||
/// Using `Weak` here is to avoid circular references in exfat file systems.
|
||||
/// We should avoid the circular reference by design, and then we can change this to `Arc`.
|
||||
backend: Option<Weak<dyn PageCacheBackend>>,
|
||||
/// Flags
|
||||
flags: VmoFlags,
|
||||
/// The virtual pages where the VMO resides.
|
||||
pages: XArray<UFrame>,
|
||||
pages: XArray<CachePage>,
|
||||
/// The size of the VMO.
|
||||
///
|
||||
/// Note: This size may not necessarily match the size of the `pages`, but it is
|
||||
|
|
@ -84,6 +115,7 @@ pub struct Vmo {
|
|||
impl Debug for Vmo {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.debug_struct("Vmo")
|
||||
.field("has_backend", &self.backend.is_some())
|
||||
.field("flags", &self.flags)
|
||||
.field("size", &self.size)
|
||||
.field("writable_mapping_status", &self.writable_mapping_status)
|
||||
|
|
@ -111,11 +143,22 @@ bitflags! {
|
|||
/// The error type used for commit operations of [`Vmo`].
|
||||
#[derive(Debug)]
|
||||
pub enum VmoCommitError {
|
||||
/// Represents a general error raised during the commit operation.
|
||||
/// A general error occurred during the commit operation.
|
||||
Err(Error),
|
||||
/// Represents that the commit operation need to do I/O operation on the
|
||||
/// wrapped index.
|
||||
/// The commit operation requires an I/O operation to read the page
|
||||
/// from the backend.
|
||||
///
|
||||
/// The wrapped value is the page index.
|
||||
NeedIo(usize),
|
||||
/// Failed to lock the page because it's currently locked by another thread.
|
||||
///
|
||||
/// Contains the page index and the page itself.
|
||||
LockPageFailed(usize, CachePage),
|
||||
/// The page exists but is not yet initialized.
|
||||
///
|
||||
/// The caller should wait for initialization to complete.
|
||||
/// Contains the page index and the page.
|
||||
WaitUntilInit(usize, CachePage),
|
||||
}
|
||||
|
||||
impl From<Error> for VmoCommitError {
|
||||
|
|
@ -130,84 +173,54 @@ impl From<ostd::Error> for VmoCommitError {
|
|||
}
|
||||
}
|
||||
|
||||
bitflags! {
|
||||
/// Commit Flags.
|
||||
pub struct CommitFlags: u8 {
|
||||
/// Set this flag if the page will be completely overwritten.
|
||||
/// This flag contains the WILL_WRITE flag.
|
||||
const WILL_OVERWRITE = 1;
|
||||
}
|
||||
}
|
||||
|
||||
impl CommitFlags {
|
||||
pub fn will_overwrite(&self) -> bool {
|
||||
self.contains(Self::WILL_OVERWRITE)
|
||||
}
|
||||
}
|
||||
|
||||
impl Vmo {
|
||||
/// Prepares a new `UFrame` for the target index in pages, returns this new frame.
|
||||
/// Converts this VMO to a disk-backed VMO wrapper if it has a backend.
|
||||
///
|
||||
/// This operation may involve I/O operations if the VMO is backed by a pager.
|
||||
fn prepare_page(&self, page_idx: usize, commit_flags: CommitFlags) -> Result<UFrame> {
|
||||
match &self.pager {
|
||||
None => Ok(FrameAllocOptions::new().alloc_frame()?.into()),
|
||||
Some(pager) => {
|
||||
if commit_flags.will_overwrite() {
|
||||
pager.commit_overwrite(page_idx)
|
||||
} else {
|
||||
pager.commit_page(page_idx)
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Returns `None` if this is an anonymous VMO.
|
||||
pub fn as_disk_backed(&self) -> Option<DiskBackedVmo<'_>> {
|
||||
self.backend.as_ref().and_then(|weak_backend| {
|
||||
weak_backend
|
||||
.upgrade()
|
||||
.map(|backend| DiskBackedVmo { vmo: self, backend })
|
||||
})
|
||||
}
|
||||
|
||||
/// Commits a page at a specific page index.
|
||||
///
|
||||
/// This method may involve I/O operations if the VMO needs to fetch a page from
|
||||
/// the underlying page cache.
|
||||
pub fn commit_on(&self, page_idx: usize, commit_flags: CommitFlags) -> Result<UFrame> {
|
||||
let new_page = self.prepare_page(page_idx, commit_flags)?;
|
||||
pub fn commit_on(&self, page_idx: usize) -> Result<CachePage> {
|
||||
self.commit_on_internal(page_idx, false)
|
||||
}
|
||||
|
||||
fn commit_on_internal(&self, page_idx: usize, will_overwrite: bool) -> Result<CachePage> {
|
||||
let mut locked_pages = self.pages.lock();
|
||||
if page_idx * PAGE_SIZE > self.size() {
|
||||
return_errno_with_message!(Errno::EINVAL, "the offset is outside the VMO");
|
||||
}
|
||||
|
||||
let mut cursor = locked_pages.cursor_mut(page_idx as u64);
|
||||
if let Some(page) = cursor.load() {
|
||||
return Ok(page.clone());
|
||||
if let Some(disk_backed) = self.as_disk_backed() {
|
||||
disk_backed.commit_on(locked_pages, page_idx, will_overwrite)
|
||||
} else {
|
||||
let mut cursor = locked_pages.cursor_mut(page_idx as u64);
|
||||
if let Some(page) = cursor.load() {
|
||||
return Ok(page.clone());
|
||||
}
|
||||
|
||||
let new_page = CachePage::alloc_zero()?;
|
||||
cursor.store(new_page.clone());
|
||||
|
||||
Ok(new_page)
|
||||
}
|
||||
|
||||
cursor.store(new_page.clone());
|
||||
Ok(new_page)
|
||||
}
|
||||
|
||||
fn try_commit_with_cursor(
|
||||
&self,
|
||||
cursor: &mut Cursor<'_, UFrame>,
|
||||
) -> core::result::Result<UFrame, VmoCommitError> {
|
||||
if let Some(committed_page) = cursor.load() {
|
||||
return Ok(committed_page.clone());
|
||||
}
|
||||
|
||||
if let Some(pager) = &self.pager {
|
||||
// FIXME: Here `Vmo` treat all instructions in `pager` as I/O instructions
|
||||
// since it needs to take the inner `Mutex` lock and users also cannot hold a
|
||||
// `SpinLock` to do such instructions. This workaround may introduce some performance
|
||||
// issues. In the future we should solve the redundancy of `Vmo` and the pagecache
|
||||
// make sure return such error when really needing I/Os.
|
||||
return Err(VmoCommitError::NeedIo(cursor.index() as usize));
|
||||
}
|
||||
|
||||
let frame = self.commit_on(cursor.index() as usize, CommitFlags::empty())?;
|
||||
Ok(frame)
|
||||
}
|
||||
|
||||
/// Commits the page corresponding to the target offset in the VMO.
|
||||
///
|
||||
/// If the commit operation needs to perform I/O, it will return a [`VmoCommitError::NeedIo`].
|
||||
pub fn try_commit_page(&self, offset: usize) -> core::result::Result<UFrame, VmoCommitError> {
|
||||
pub fn try_commit_page(
|
||||
&self,
|
||||
offset: usize,
|
||||
) -> core::result::Result<CachePage, VmoCommitError> {
|
||||
let page_idx = offset / PAGE_SIZE;
|
||||
if offset >= self.size() {
|
||||
return Err(VmoCommitError::Err(Error::with_message(
|
||||
|
|
@ -218,7 +231,30 @@ impl Vmo {
|
|||
|
||||
let guard = disable_preempt();
|
||||
let mut cursor = self.pages.cursor(&guard, page_idx as u64);
|
||||
self.try_commit_with_cursor(&mut cursor)
|
||||
self.try_commit_with_cursor(&mut cursor, false)
|
||||
.map(|(_, page)| page)
|
||||
}
|
||||
|
||||
fn try_commit_with_cursor(
|
||||
&self,
|
||||
cursor: &mut Cursor<'_, CachePage>,
|
||||
will_overwrite: bool,
|
||||
) -> core::result::Result<(usize, CachePage), VmoCommitError> {
|
||||
if let Some(disk_backed) = self.as_disk_backed() {
|
||||
if let Some((index, page)) =
|
||||
disk_backed.try_commit_with_cursor(cursor, will_overwrite)?
|
||||
{
|
||||
return Ok((index, page));
|
||||
}
|
||||
} else if let Some(page) = cursor.load() {
|
||||
let index = cursor.index() as usize;
|
||||
return Ok((index, page.clone()));
|
||||
}
|
||||
|
||||
// Need to commit. Only Anonymous VMOs can reach here, because disk-backed VMOs will return
|
||||
// `Err` if the page is not loaded.
|
||||
let index = cursor.index() as usize;
|
||||
Ok((index, self.commit_on_internal(index, will_overwrite)?))
|
||||
}
|
||||
|
||||
/// Traverses the indices within a specified range of a VMO sequentially.
|
||||
|
|
@ -230,11 +266,25 @@ impl Vmo {
|
|||
pub fn try_operate_on_range<F>(
|
||||
&self,
|
||||
range: &Range<usize>,
|
||||
mut operate: F,
|
||||
operate: F,
|
||||
) -> core::result::Result<(), VmoCommitError>
|
||||
where
|
||||
F: FnMut(
|
||||
&mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>,
|
||||
&mut dyn FnMut() -> core::result::Result<(usize, CachePage), VmoCommitError>,
|
||||
) -> core::result::Result<(), VmoCommitError>,
|
||||
{
|
||||
self.try_operate_on_range_internal(range, operate, false)
|
||||
}
|
||||
|
||||
fn try_operate_on_range_internal<F>(
|
||||
&self,
|
||||
range: &Range<usize>,
|
||||
mut operate: F,
|
||||
will_overwrite: bool,
|
||||
) -> core::result::Result<(), VmoCommitError>
|
||||
where
|
||||
F: FnMut(
|
||||
&mut dyn FnMut() -> core::result::Result<(usize, CachePage), VmoCommitError>,
|
||||
) -> core::result::Result<(), VmoCommitError>,
|
||||
{
|
||||
if range.end > self.size() {
|
||||
|
|
@ -248,224 +298,231 @@ impl Vmo {
|
|||
let guard = disable_preempt();
|
||||
let mut cursor = self.pages.cursor(&guard, page_idx_range.start as u64);
|
||||
for page_idx in page_idx_range {
|
||||
let mut commit_fn = || self.try_commit_with_cursor(&mut cursor);
|
||||
let mut commit_fn = || self.try_commit_with_cursor(&mut cursor, will_overwrite);
|
||||
operate(&mut commit_fn)?;
|
||||
cursor.next();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Traverses the indices within a specified range of a VMO sequentially.
|
||||
///
|
||||
/// For each index position, you have the option to commit the page as well as
|
||||
/// perform other operations.
|
||||
///
|
||||
/// This method may involve I/O operations if the VMO needs to fetch a page from
|
||||
/// the underlying page cache.
|
||||
fn operate_on_range<F>(
|
||||
&self,
|
||||
mut range: Range<usize>,
|
||||
mut operate: F,
|
||||
commit_flags: CommitFlags,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: FnMut(
|
||||
&mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>,
|
||||
) -> core::result::Result<(), VmoCommitError>,
|
||||
{
|
||||
'retry: loop {
|
||||
let res = self.try_operate_on_range(&range, &mut operate);
|
||||
match res {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(VmoCommitError::Err(e)) => return Err(e),
|
||||
Err(VmoCommitError::NeedIo(index)) => {
|
||||
self.commit_on(index, commit_flags)?;
|
||||
range.start = index * PAGE_SIZE;
|
||||
continue 'retry;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Decommits a range of pages in the VMO.
|
||||
///
|
||||
/// The range must be within the size of the VMO.
|
||||
///
|
||||
/// The start and end addresses will be rounded down and up to page boundaries.
|
||||
pub fn decommit(&self, range: Range<usize>) -> Result<()> {
|
||||
let locked_pages = self.pages.lock();
|
||||
if range.end > self.size() {
|
||||
return_errno_with_message!(Errno::EINVAL, "operated range exceeds the vmo size");
|
||||
}
|
||||
|
||||
self.decommit_pages(locked_pages, range)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reads the specified amount of buffer content starting from the target offset in the VMO.
|
||||
pub fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()> {
|
||||
let read_len = writer.avail().min(self.size().saturating_sub(offset));
|
||||
let read_range = offset..(offset + read_len);
|
||||
let mut read_offset = offset % PAGE_SIZE;
|
||||
|
||||
let read =
|
||||
move |commit_fn: &mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>| {
|
||||
let frame = commit_fn()?;
|
||||
frame
|
||||
.reader()
|
||||
.skip(read_offset)
|
||||
.read_fallible(writer)
|
||||
.map_err(|e| VmoCommitError::from(e.0))?;
|
||||
read_offset = 0;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
self.operate_on_range(read_range, read, CommitFlags::empty())
|
||||
}
|
||||
|
||||
/// Writes the specified amount of buffer content starting from the target offset in the VMO.
|
||||
pub fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()> {
|
||||
let write_len = reader.remain();
|
||||
let write_range = offset..(offset + write_len);
|
||||
let mut write_offset = offset % PAGE_SIZE;
|
||||
let mut write =
|
||||
move |commit_fn: &mut dyn FnMut() -> core::result::Result<UFrame, VmoCommitError>| {
|
||||
let frame = commit_fn()?;
|
||||
frame
|
||||
.writer()
|
||||
.skip(write_offset)
|
||||
.write_fallible(reader)
|
||||
.map_err(|e| VmoCommitError::from(e.0))?;
|
||||
write_offset = 0;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
if write_range.len() < PAGE_SIZE {
|
||||
self.operate_on_range(write_range.clone(), write, CommitFlags::empty())?;
|
||||
} else {
|
||||
let temp = write_range.start + PAGE_SIZE - 1;
|
||||
let up_align_start = temp - temp % PAGE_SIZE;
|
||||
let down_align_end = write_range.end - write_range.end % PAGE_SIZE;
|
||||
if write_range.start != up_align_start {
|
||||
let head_range = write_range.start..up_align_start;
|
||||
self.operate_on_range(head_range, &mut write, CommitFlags::empty())?;
|
||||
}
|
||||
if up_align_start != down_align_end {
|
||||
let mid_range = up_align_start..down_align_end;
|
||||
self.operate_on_range(mid_range, &mut write, CommitFlags::WILL_OVERWRITE)?;
|
||||
}
|
||||
if down_align_end != write_range.end {
|
||||
let tail_range = down_align_end..write_range.end;
|
||||
self.operate_on_range(tail_range, &mut write, CommitFlags::empty())?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pager) = &self.pager {
|
||||
let page_idx_range = get_page_idx_range(&write_range);
|
||||
for page_idx in page_idx_range {
|
||||
pager.update_page(page_idx)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clears the target range in current VMO by writing zeros.
|
||||
pub fn clear(&self, range: Range<usize>) -> Result<()> {
|
||||
let buffer = vec![0u8; range.end - range.start];
|
||||
let mut reader = VmReader::from(buffer.as_slice()).to_fallible();
|
||||
self.write(range.start, &mut reader)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the size of current VMO.
|
||||
pub fn size(&self) -> usize {
|
||||
self.size.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
/// Resizes current VMO to target size.
|
||||
///
|
||||
/// The VMO must be resizable.
|
||||
///
|
||||
/// The new size will be rounded up to page boundaries.
|
||||
pub fn resize(&self, new_size: usize) -> Result<()> {
|
||||
assert!(self.flags.contains(VmoFlags::RESIZABLE));
|
||||
let new_size = new_size.align_up(PAGE_SIZE);
|
||||
|
||||
let locked_pages = self.pages.lock();
|
||||
|
||||
let old_size = self.size();
|
||||
if new_size == old_size {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.size.store(new_size, Ordering::Release);
|
||||
|
||||
if new_size < old_size {
|
||||
self.decommit_pages(locked_pages, new_size..old_size)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn decommit_pages(
|
||||
&self,
|
||||
mut locked_pages: LockedXArray<UFrame>,
|
||||
range: Range<usize>,
|
||||
) -> Result<()> {
|
||||
let page_idx_range = get_page_idx_range(&range);
|
||||
let mut cursor = locked_pages.cursor_mut(page_idx_range.start as u64);
|
||||
|
||||
let Some(pager) = &self.pager else {
|
||||
cursor.remove();
|
||||
while let Some(page_idx) = cursor.next_present()
|
||||
&& page_idx < page_idx_range.end as u64
|
||||
{
|
||||
cursor.remove();
|
||||
}
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let mut removed_page_idx = Vec::new();
|
||||
if cursor.remove().is_some() {
|
||||
removed_page_idx.push(page_idx_range.start);
|
||||
}
|
||||
while let Some(page_idx) = cursor.next_present()
|
||||
&& page_idx < page_idx_range.end as u64
|
||||
{
|
||||
removed_page_idx.push(page_idx as usize);
|
||||
cursor.remove();
|
||||
}
|
||||
|
||||
drop(locked_pages);
|
||||
|
||||
for page_idx in removed_page_idx {
|
||||
pager.decommit_page(page_idx)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the flags of current VMO.
|
||||
pub fn flags(&self) -> VmoFlags {
|
||||
self.flags
|
||||
}
|
||||
|
||||
/// Replaces the page at the `page_idx` in the VMO with the input `page`.
|
||||
fn replace(&self, page: UFrame, page_idx: usize) -> Result<()> {
|
||||
let mut locked_pages = self.pages.lock();
|
||||
if page_idx >= self.size() / PAGE_SIZE {
|
||||
return_errno_with_message!(Errno::EINVAL, "the page index is outside of the vmo");
|
||||
/// Returns the status of writable mappings of the VMO.
|
||||
pub fn writable_mapping_status(&self) -> &WritableMappingStatus {
|
||||
// Currently, only VMOs used by `MemfdInode` (anonymous) track writable mapping status.
|
||||
// Disk-backed VMOs do not use this field.
|
||||
debug_assert!(!self.is_disk_backed());
|
||||
&self.writable_mapping_status
|
||||
}
|
||||
|
||||
fn decommit_pages(
|
||||
&self,
|
||||
mut locked_pages: LockedXArray<CachePage>,
|
||||
range: Range<usize>,
|
||||
) -> Result<()> {
|
||||
let page_idx_range = get_page_idx_range(&range);
|
||||
let mut cursor = locked_pages.cursor_mut(page_idx_range.start as u64);
|
||||
|
||||
loop {
|
||||
cursor.remove();
|
||||
let page_idx = cursor.next_present();
|
||||
if page_idx.is_none_or(|idx| idx >= page_idx_range.end as u64) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
locked_pages.store(page_idx as u64, page);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the status of writable mappings of the VMO.
|
||||
pub fn writable_mapping_status(&self) -> &WritableMappingStatus {
|
||||
// Only writable file-backed mappings may need to be tracked.
|
||||
debug_assert!(self.pager.is_some());
|
||||
&self.writable_mapping_status
|
||||
/// Returns whether this VMO is disk-backed.
|
||||
fn is_disk_backed(&self) -> bool {
|
||||
self.backend.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
impl Vmo {
|
||||
/// Reads the specified amount of buffer content starting from the target offset in the VMO.
|
||||
pub fn read(&self, offset: usize, writer: &mut VmWriter) -> Result<()> {
|
||||
let read_len = writer.avail().min(self.size().saturating_sub(offset));
|
||||
let mut read_range = offset..(offset + read_len);
|
||||
let mut read_offset = offset % PAGE_SIZE;
|
||||
|
||||
let mut read = move |commit_fn: &mut dyn FnMut() -> core::result::Result<
|
||||
(usize, CachePage),
|
||||
VmoCommitError,
|
||||
>| {
|
||||
let (_, page) = commit_fn()?;
|
||||
page.reader()
|
||||
.skip(read_offset)
|
||||
.read_fallible(writer)
|
||||
.map_err(|e| VmoCommitError::from(e.0))?;
|
||||
read_offset = 0;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
'retry: loop {
|
||||
let res = self.try_operate_on_range(&read_range, &mut read);
|
||||
match res {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(VmoCommitError::Err(e)) => return Err(e),
|
||||
Err(VmoCommitError::NeedIo(index)) => {
|
||||
self.commit_on(index)?;
|
||||
read_range.start = index * PAGE_SIZE;
|
||||
continue 'retry;
|
||||
}
|
||||
Err(VmoCommitError::WaitUntilInit(index, cache_page)) => {
|
||||
cache_page.ensure_init(|locked_page| {
|
||||
self.as_disk_backed()
|
||||
.unwrap()
|
||||
.backend
|
||||
.read_page(index, locked_page)
|
||||
})?;
|
||||
read_range.start = index * PAGE_SIZE;
|
||||
continue 'retry;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes the specified amount of buffer content starting from the target offset in the VMO.
|
||||
pub fn write(&self, offset: usize, reader: &mut VmReader) -> Result<()> {
|
||||
let write_len = reader.remain();
|
||||
let write_range = offset..(offset + write_len);
|
||||
let mut write_offset = offset % PAGE_SIZE;
|
||||
|
||||
if !self.is_disk_backed() {
|
||||
// Fast path for anonymous VMOs
|
||||
let write = move |commit_fn: &mut dyn FnMut() -> core::result::Result<
|
||||
(usize, CachePage),
|
||||
VmoCommitError,
|
||||
>| {
|
||||
let (_, page) = commit_fn()?;
|
||||
page.writer()
|
||||
.skip(write_offset)
|
||||
.write_fallible(reader)
|
||||
.map_err(|e| VmoCommitError::from(e.0))?;
|
||||
write_offset = 0;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
return self.write_on_range(
|
||||
write_range.clone(),
|
||||
write,
|
||||
Option::<fn(&LockedCachePage) -> Result<()>>::None,
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
// Slow path for disk-backed VMOs (with dirty tracking)
|
||||
let reader = RefCell::new(reader);
|
||||
let write_offset = RefCell::new(write_offset);
|
||||
let mut write = |commit_fn: &mut dyn FnMut() -> core::result::Result<
|
||||
(usize, CachePage),
|
||||
VmoCommitError,
|
||||
>| {
|
||||
let (index, page) = commit_fn()?;
|
||||
let locked_page = page
|
||||
.try_lock()
|
||||
.ok_or_else(|| VmoCommitError::LockPageFailed(index, page))?;
|
||||
locked_page.set_dirty();
|
||||
locked_page
|
||||
.writer()
|
||||
.skip(*write_offset.borrow())
|
||||
.write_fallible(&mut reader.borrow_mut())
|
||||
.map_err(|e| VmoCommitError::from(e.0))?;
|
||||
*write_offset.borrow_mut() = 0;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
let mut fallback_write = |locked_page: &LockedCachePage| {
|
||||
locked_page.set_dirty();
|
||||
locked_page
|
||||
.writer()
|
||||
.skip(*write_offset.borrow())
|
||||
.write_fallible(&mut reader.borrow_mut())?;
|
||||
*write_offset.borrow_mut() = 0;
|
||||
Ok(())
|
||||
};
|
||||
|
||||
if write_range.len() < PAGE_SIZE {
|
||||
self.write_on_range(write_range.clone(), write, Some(fallback_write), false)?;
|
||||
} else {
|
||||
// Split into head (unaligned), middle (aligned), and tail (unaligned)
|
||||
let temp = write_range.start + PAGE_SIZE - 1;
|
||||
let up_align_start = temp - temp % PAGE_SIZE;
|
||||
let down_align_end = write_range.end - write_range.end % PAGE_SIZE;
|
||||
|
||||
if write_range.start != up_align_start {
|
||||
let head_range = write_range.start..up_align_start;
|
||||
self.write_on_range(head_range, &mut write, Some(&mut fallback_write), false)?;
|
||||
}
|
||||
if up_align_start != down_align_end {
|
||||
// Middle part is page-aligned and will be completely overwritten
|
||||
let mid_range = up_align_start..down_align_end;
|
||||
self.write_on_range(mid_range, &mut write, Some(&mut fallback_write), true)?;
|
||||
}
|
||||
if down_align_end != write_range.end {
|
||||
let tail_range = down_align_end..write_range.end;
|
||||
self.write_on_range(tail_range, &mut write, Some(&mut fallback_write), false)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_on_range<F1, F2>(
|
||||
&self,
|
||||
mut range: Range<usize>,
|
||||
mut operate: F1,
|
||||
mut fallback: Option<F2>,
|
||||
will_overwrite: bool,
|
||||
) -> Result<()>
|
||||
where
|
||||
F1: FnMut(
|
||||
&mut dyn FnMut() -> core::result::Result<(usize, CachePage), VmoCommitError>,
|
||||
) -> core::result::Result<(), VmoCommitError>,
|
||||
F2: FnMut(&LockedCachePage) -> Result<()>,
|
||||
{
|
||||
'retry: loop {
|
||||
let res = self.try_operate_on_range_internal(&range, &mut operate, will_overwrite);
|
||||
match res {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(VmoCommitError::Err(e)) => return Err(e),
|
||||
Err(VmoCommitError::NeedIo(index)) => {
|
||||
self.commit_on_internal(index, will_overwrite)?;
|
||||
range.start = index * PAGE_SIZE;
|
||||
continue 'retry;
|
||||
}
|
||||
Err(VmoCommitError::WaitUntilInit(index, cache_page)) => {
|
||||
cache_page.ensure_init(|locked_page| {
|
||||
self.as_disk_backed()
|
||||
.unwrap()
|
||||
.backend
|
||||
.read_page(index, locked_page)
|
||||
})?;
|
||||
range.start = index * PAGE_SIZE;
|
||||
continue 'retry;
|
||||
}
|
||||
Err(VmoCommitError::LockPageFailed(index, cache_page)) => {
|
||||
let Some(fallback) = &mut fallback else {
|
||||
unreachable!()
|
||||
};
|
||||
let locked_page = cache_page.lock();
|
||||
fallback(&locked_page)?;
|
||||
range.start = (index + 1) * PAGE_SIZE;
|
||||
continue 'retry;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -498,6 +555,169 @@ impl VmIoFill for Vmo {
|
|||
}
|
||||
}
|
||||
|
||||
/// A wrapper around a disk-backed VMO that provides specialized operations.
|
||||
///
|
||||
/// This structure is created by calling [`Vmo::as_disk_backed()`] and provides
|
||||
/// access to disk-backed specific functionality like reading from storage and
|
||||
/// managing dirty pages.
|
||||
pub struct DiskBackedVmo<'a> {
|
||||
vmo: &'a Vmo,
|
||||
backend: Arc<dyn PageCacheBackend>,
|
||||
}
|
||||
|
||||
impl<'a> DiskBackedVmo<'a> {
|
||||
/// Commits a page at the given index for a disk-backed VMO.
|
||||
fn commit_on(
|
||||
&self,
|
||||
mut locked_pages: LockedXArray<'_, CachePage>,
|
||||
page_idx: usize,
|
||||
will_overwrite: bool,
|
||||
) -> Result<CachePage> {
|
||||
let mut cursor = locked_pages.cursor_mut(page_idx as u64);
|
||||
if let Some(page) = cursor.load() {
|
||||
let page = page.clone();
|
||||
if self.backend.npages() > page_idx {
|
||||
drop(locked_pages);
|
||||
if !will_overwrite {
|
||||
page.ensure_init(|locked_page| self.backend.read_page(page_idx, locked_page))?;
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(page);
|
||||
};
|
||||
|
||||
// Page is within the file bounds - need to read from backend
|
||||
if self.backend.npages() > page_idx {
|
||||
let new_page = CachePage::alloc_uninit()?;
|
||||
// Acquiring the lock from a new page must succeed.
|
||||
let locked_page = new_page.try_lock().unwrap();
|
||||
|
||||
cursor.store(locked_page.clone());
|
||||
|
||||
drop(locked_pages);
|
||||
|
||||
if will_overwrite {
|
||||
// Page will be completely overwritten, no need to read
|
||||
Ok(locked_page.unlock())
|
||||
} else {
|
||||
// Read the page from backend storage
|
||||
self.backend.read_page(page_idx, locked_page)?;
|
||||
Ok(new_page)
|
||||
}
|
||||
} else {
|
||||
// Page is beyond file bounds - treat as hole (zero-filled)
|
||||
let new_page = CachePage::alloc_zero()?;
|
||||
cursor.store(new_page.clone());
|
||||
|
||||
Ok(new_page)
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempts to commit a page using a cursor, without blocking on I/O.
|
||||
fn try_commit_with_cursor(
|
||||
&self,
|
||||
cursor: &mut Cursor<'_, CachePage>,
|
||||
will_overwrite: bool,
|
||||
) -> core::result::Result<Option<(usize, CachePage)>, VmoCommitError> {
|
||||
let page_idx = cursor.index() as usize;
|
||||
|
||||
let Some(page) = cursor.load() else {
|
||||
return Err(VmoCommitError::NeedIo(page_idx));
|
||||
};
|
||||
|
||||
// If page is within file bounds, check if it's initialized
|
||||
if !will_overwrite && self.backend.npages() > page_idx && page.is_uninit() {
|
||||
return Err(VmoCommitError::WaitUntilInit(page_idx, page.clone()));
|
||||
}
|
||||
|
||||
Ok(Some((page_idx, page.clone())))
|
||||
}
|
||||
|
||||
/// Collects dirty pages in the specified byte range.
|
||||
///
|
||||
/// If `remove` is `true`, the pages will be removed from the XArray while
|
||||
/// being collected. Otherwise, the pages are only read.
|
||||
fn collect_dirty_pages(
|
||||
&self,
|
||||
range: &Range<usize>,
|
||||
remove: bool,
|
||||
) -> Result<Vec<(usize, CachePage)>> {
|
||||
let mut locked_pages = self.vmo.pages.lock();
|
||||
if range.start > self.size() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let page_idx_range = get_page_idx_range(range);
|
||||
let npages = self.backend.npages();
|
||||
if page_idx_range.start >= npages {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let mut dirty_pages = Vec::new();
|
||||
|
||||
if remove {
|
||||
let mut cursor = locked_pages.cursor_mut(page_idx_range.start as u64);
|
||||
if let Some(page) = cursor.remove()
|
||||
&& page.is_dirty()
|
||||
{
|
||||
dirty_pages.push((page_idx_range.start, page.clone()));
|
||||
}
|
||||
|
||||
while let Some(page_idx) = cursor.next_present() {
|
||||
let page_idx = page_idx as usize;
|
||||
if page_idx >= page_idx_range.end || page_idx >= npages {
|
||||
break;
|
||||
}
|
||||
|
||||
let page = cursor.remove().unwrap();
|
||||
if page.is_dirty() {
|
||||
dirty_pages.push((page_idx, page.clone()));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let mut cursor = locked_pages.cursor(page_idx_range.start as u64);
|
||||
if let Some(page) = cursor.load()
|
||||
&& page.is_dirty()
|
||||
{
|
||||
dirty_pages.push((page_idx_range.start, page.clone()));
|
||||
}
|
||||
|
||||
while let Some(page_idx) = cursor.next_present() {
|
||||
let page_idx = page_idx as usize;
|
||||
if page_idx >= page_idx_range.end || page_idx >= npages {
|
||||
break;
|
||||
}
|
||||
|
||||
let page = cursor.load().unwrap();
|
||||
if page.is_dirty() {
|
||||
dirty_pages.push((page_idx, page.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(dirty_pages)
|
||||
}
|
||||
|
||||
/// Writes back a collection of dirty pages to the backend storage.
|
||||
fn write_back_pages(&self, dirty_pages: Vec<(usize, CachePage)>) -> Result<()> {
|
||||
for (page_idx, page) in dirty_pages {
|
||||
let locked_page = page.lock();
|
||||
if locked_page.is_dirty() {
|
||||
self.backend.write_page(page_idx, locked_page)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for DiskBackedVmo<'_> {
|
||||
type Target = Vmo;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.vmo
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the page index range that contains the offset range of VMO.
|
||||
pub fn get_page_idx_range(vmo_offset_range: &Range<usize>) -> Range<usize> {
|
||||
let start = vmo_offset_range.start.align_down(PAGE_SIZE);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
// SPDX-License-Identifier: MPL-2.0
|
||||
|
||||
use core::{ops::Range, sync::atomic::Ordering};
|
||||
|
||||
use align_ext::AlignExt;
|
||||
use ostd::mm::VmIoFill;
|
||||
|
||||
use crate::{
|
||||
fs::utils::{PageCacheBackend, PageCacheOps},
|
||||
prelude::*,
|
||||
vm::vmo::{Vmo, VmoFlags, VmoOptions},
|
||||
};
|
||||
|
||||
impl PageCacheOps for Vmo {
|
||||
fn with_capacity(capacity: usize, backend: Weak<dyn PageCacheBackend>) -> Result<Arc<Self>> {
|
||||
VmoOptions::new(capacity)
|
||||
.flags(VmoFlags::RESIZABLE)
|
||||
.backend(backend)
|
||||
.alloc()
|
||||
}
|
||||
|
||||
// TODO: This method also need to unmap the decommitted pages from the page tables.
|
||||
fn resize(&self, new_size: usize, old_size: usize) -> Result<()> {
|
||||
assert!(self.flags.contains(VmoFlags::RESIZABLE));
|
||||
|
||||
if new_size < old_size && !new_size.is_multiple_of(PAGE_SIZE) {
|
||||
let fill_zero_end = old_size.min(new_size.align_up(PAGE_SIZE));
|
||||
PageCacheOps::fill_zeros(self, new_size..fill_zero_end)?;
|
||||
}
|
||||
|
||||
let new_size = new_size.align_up(PAGE_SIZE);
|
||||
|
||||
let locked_pages = self.pages.lock();
|
||||
|
||||
let old_size = self.size();
|
||||
if new_size == old_size {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.size.store(new_size, Ordering::Release);
|
||||
|
||||
if new_size < old_size {
|
||||
self.decommit_pages(locked_pages, new_size..old_size)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn flush_range(&self, range: Range<usize>) -> Result<()> {
|
||||
let Some(vmo) = self.as_disk_backed() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let dirty_pages = vmo.collect_dirty_pages(&range, false)?;
|
||||
vmo.write_back_pages(dirty_pages)
|
||||
}
|
||||
|
||||
// TODO: This method also need to unmap the discarded pages from the page tables.
|
||||
fn discard_range(&self, range: Range<usize>) -> Result<()> {
|
||||
let Some(vmo) = self.as_disk_backed() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let dirty_pages = vmo.collect_dirty_pages(&range, true)?;
|
||||
vmo.write_back_pages(dirty_pages)
|
||||
}
|
||||
|
||||
fn fill_zeros(&self, range: Range<usize>) -> Result<()> {
|
||||
VmIoFill::fill_zeros(self, range.start, range.end - range.start)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue