// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2018-2023 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/trace.h" #include /* * Swappable Temporary Memory * ========================== * * Online checking sometimes needs to be able to stage a large amount of data * in memory. This information might not fit in the available memory and it * doesn't all need to be accessible at all times. In other words, we want an * indexed data buffer to store data that can be paged out. * * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to * store our staging data. This file is not installed in the file descriptor * table so that user programs cannot access the data, which means that the * xfile must be freed with xfile_destroy. * * xfiles assume that the caller will handle all required concurrency * management; standard vfs locks (freezer and inode) are not taken. Reads * and writes are satisfied directly from the page cache. */ /* * xfiles must not be exposed to userspace and require upper layers to * coordinate access to the one handle returned by the constructor, so * establish a separate lock class for xfiles to avoid confusing lockdep. */ static struct lock_class_key xfile_i_mutex_key; /* * Create an xfile of the given size. The description will be used in the * trace output. */ int xfile_create( const char *description, loff_t isize, struct xfile **xfilep) { struct inode *inode; struct xfile *xf; int error; xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS); if (!xf) return -ENOMEM; xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE); if (IS_ERR(xf->file)) { error = PTR_ERR(xf->file); goto out_xfile; } inode = file_inode(xf->file); lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key); /* * We don't want to bother with kmapping data during repair, so don't * allow highmem pages to back this mapping. */ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); trace_xfile_create(xf); *xfilep = xf; return 0; out_xfile: kfree(xf); return error; } /* Close the file and release all resources. */ void xfile_destroy( struct xfile *xf) { struct inode *inode = file_inode(xf->file); trace_xfile_destroy(xf); lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key); fput(xf->file); kfree(xf); } /* * Load an object. Since we're treating this file as "memory", any error or * short IO is treated as a failure to allocate memory. */ int xfile_load( struct xfile *xf, void *buf, size_t count, loff_t pos) { struct inode *inode = file_inode(xf->file); unsigned int pflags; if (count > MAX_RW_COUNT) return -ENOMEM; if (inode->i_sb->s_maxbytes - pos < count) return -ENOMEM; trace_xfile_load(xf, pos, count); pflags = memalloc_nofs_save(); while (count > 0) { struct folio *folio; unsigned int len; unsigned int offset; if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_READ) < 0) break; if (!folio) { /* * No data stored at this offset, just zero the output * buffer until the next page boundary. */ len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos)); memset(buf, 0, len); } else { if (filemap_check_wb_err(inode->i_mapping, 0)) { folio_unlock(folio); folio_put(folio); break; } offset = offset_in_folio(folio, pos); len = min_t(ssize_t, count, folio_size(folio) - offset); memcpy(buf, folio_address(folio) + offset, len); folio_unlock(folio); folio_put(folio); } count -= len; pos += len; buf += len; } memalloc_nofs_restore(pflags); if (count) return -ENOMEM; return 0; } /* * Store an object. Since we're treating this file as "memory", any error or * short IO is treated as a failure to allocate memory. */ int xfile_store( struct xfile *xf, const void *buf, size_t count, loff_t pos) { struct inode *inode = file_inode(xf->file); unsigned int pflags; if (count > MAX_RW_COUNT) return -ENOMEM; if (inode->i_sb->s_maxbytes - pos < count) return -ENOMEM; trace_xfile_store(xf, pos, count); /* * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), * actually allocates a folio instead of erroring out. */ if (pos + count > i_size_read(inode)) i_size_write(inode, pos + count); pflags = memalloc_nofs_save(); while (count > 0) { struct folio *folio; unsigned int len; unsigned int offset; if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE) < 0) break; if (filemap_check_wb_err(inode->i_mapping, 0)) { folio_unlock(folio); folio_put(folio); break; } offset = offset_in_folio(folio, pos); len = min_t(ssize_t, count, folio_size(folio) - offset); memcpy(folio_address(folio) + offset, buf, len); folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); count -= len; pos += len; buf += len; } memalloc_nofs_restore(pflags); if (count) return -ENOMEM; return 0; } /* Find the next written area in the xfile data for a given offset. */ loff_t xfile_seek_data( struct xfile *xf, loff_t pos) { loff_t ret; ret = vfs_llseek(xf->file, pos, SEEK_DATA); trace_xfile_seek_data(xf, pos, ret); return ret; } /* * Grab the (locked) folio for a memory object. The object cannot span a folio * boundary. Returns the locked folio if successful, NULL if there was no * folio or it didn't cover the range requested, or an ERR_PTR on failure. */ struct folio * xfile_get_folio( struct xfile *xf, loff_t pos, size_t len, unsigned int flags) { struct inode *inode = file_inode(xf->file); struct folio *folio = NULL; unsigned int pflags; int error; if (inode->i_sb->s_maxbytes - pos < len) return ERR_PTR(-ENOMEM); trace_xfile_get_folio(xf, pos, len); /* * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), * actually allocates a folio instead of erroring out. */ if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode)) i_size_write(inode, pos + len); pflags = memalloc_nofs_save(); error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ); memalloc_nofs_restore(pflags); if (error) return ERR_PTR(error); if (!folio) return NULL; if (len > folio_size(folio) - offset_in_folio(folio, pos)) { folio_unlock(folio); folio_put(folio); return NULL; } if (filemap_check_wb_err(inode->i_mapping, 0)) { folio_unlock(folio); folio_put(folio); return ERR_PTR(-EIO); } /* * Mark the folio dirty so that it won't be reclaimed once we drop the * (potentially last) reference in xfile_put_folio. */ if (flags & XFILE_ALLOC) folio_mark_dirty(folio); return folio; } /* * Release the (locked) folio for a memory object. */ void xfile_put_folio( struct xfile *xf, struct folio *folio) { trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio)); folio_unlock(folio); folio_put(folio); } /* Discard the page cache that's backing a range of the xfile. */ void xfile_discard( struct xfile *xf, loff_t pos, u64 count) { trace_xfile_discard(xf, pos, count); shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1); }