// SPDX-License-Identifier: GPL-2.0-or-later /* kiocb-using read/write * * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include #include #include #include #include #include #include #include #include #include "internal.h" struct cachefiles_kiocb { struct kiocb iocb; refcount_t ki_refcnt; loff_t start; union { size_t skipped; size_t len; }; struct cachefiles_object *object; netfs_io_terminated_t term_func; void *term_func_priv; bool was_async; unsigned int inval_counter; /* Copy of cookie->inval_counter */ u64 b_writing; }; static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) { if (refcount_dec_and_test(&ki->ki_refcnt)) { cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq); fput(ki->iocb.ki_filp); kfree(ki); } } /* * Handle completion of a read from the cache. */ static void cachefiles_read_complete(struct kiocb *iocb, long ret) { struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); struct inode *inode = file_inode(ki->iocb.ki_filp); _enter("%ld", ret); if (ret < 0) trace_cachefiles_io_error(ki->object, inode, ret, cachefiles_trace_read_error); if (ki->term_func) { if (ret >= 0) { if (ki->object->cookie->inval_counter == ki->inval_counter) ki->skipped += ret; else ret = -ESTALE; } ki->term_func(ki->term_func_priv, ret, ki->was_async); } cachefiles_put_kiocb(ki); } /* * Initiate a read from the cache. */ static int cachefiles_read(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv) { struct cachefiles_object *object; struct cachefiles_kiocb *ki; struct file *file; unsigned int old_nofs; ssize_t ret = -ENOBUFS; size_t len = iov_iter_count(iter), skipped = 0; if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) goto presubmission_error; fscache_count_read(); object = cachefiles_cres_object(cres); file = cachefiles_cres_file(cres); _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start_pos, len, i_size_read(file_inode(file))); /* If the caller asked us to seek for data before doing the read, then * we should do that now. If we find a gap, we fill it with zeros. */ if (read_hole != NETFS_READ_HOLE_IGNORE) { loff_t off = start_pos, off2; off2 = cachefiles_inject_read_error(); if (off2 == 0) off2 = vfs_llseek(file, off, SEEK_DATA); if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { skipped = 0; ret = off2; goto presubmission_error; } if (off2 == -ENXIO || off2 >= start_pos + len) { /* The region is beyond the EOF or there's no more data * in the region, so clear the rest of the buffer and * return success. */ ret = -ENODATA; if (read_hole == NETFS_READ_HOLE_FAIL) goto presubmission_error; iov_iter_zero(len, iter); skipped = len; ret = 0; goto presubmission_error; } skipped = off2 - off; iov_iter_zero(skipped, iter); } ret = -ENOMEM; ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) goto presubmission_error; refcount_set(&ki->ki_refcnt, 2); ki->iocb.ki_filp = file; ki->iocb.ki_pos = start_pos + skipped; ki->iocb.ki_flags = IOCB_DIRECT; ki->iocb.ki_ioprio = get_current_ioprio(); ki->skipped = skipped; ki->object = object; ki->inval_counter = cres->inval_counter; ki->term_func = term_func; ki->term_func_priv = term_func_priv; ki->was_async = true; if (ki->term_func) ki->iocb.ki_complete = cachefiles_read_complete; get_file(ki->iocb.ki_filp); cachefiles_grab_object(object, cachefiles_obj_get_ioreq); trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped); old_nofs = memalloc_nofs_save(); ret = cachefiles_inject_read_error(); if (ret == 0) ret = vfs_iocb_iter_read(file, &ki->iocb, iter); memalloc_nofs_restore(old_nofs); switch (ret) { case -EIOCBQUEUED: goto in_progress; case -ERESTARTSYS: case -ERESTARTNOINTR: case -ERESTARTNOHAND: case -ERESTART_RESTARTBLOCK: /* There's no easy way to restart the syscall since other AIO's * may be already running. Just fail this IO with EINTR. */ ret = -EINTR; fallthrough; default: ki->was_async = false; cachefiles_read_complete(&ki->iocb, ret); if (ret > 0) ret = 0; break; } in_progress: cachefiles_put_kiocb(ki); _leave(" = %zd", ret); return ret; presubmission_error: if (term_func) term_func(term_func_priv, ret < 0 ? ret : skipped, false); return ret; } /* * Query the occupancy of the cache in a region, returning where the next chunk * of data starts and how long it is. */ static int cachefiles_query_occupancy(struct netfs_cache_resources *cres, loff_t start, size_t len, size_t granularity, loff_t *_data_start, size_t *_data_len) { struct cachefiles_object *object; struct file *file; loff_t off, off2; *_data_start = -1; *_data_len = 0; if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) return -ENOBUFS; object = cachefiles_cres_object(cres); file = cachefiles_cres_file(cres); granularity = max_t(size_t, object->volume->cache->bsize, granularity); _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start, len, i_size_read(file_inode(file))); off = cachefiles_inject_read_error(); if (off == 0) off = vfs_llseek(file, start, SEEK_DATA); if (off == -ENXIO) return -ENODATA; /* Beyond EOF */ if (off < 0 && off >= (loff_t)-MAX_ERRNO) return -ENOBUFS; /* Error. */ if (round_up(off, granularity) >= start + len) return -ENODATA; /* No data in range */ off2 = cachefiles_inject_read_error(); if (off2 == 0) off2 = vfs_llseek(file, off, SEEK_HOLE); if (off2 == -ENXIO) return -ENODATA; /* Beyond EOF */ if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO) return -ENOBUFS; /* Error. */ /* Round away partial blocks */ off = round_up(off, granularity); off2 = round_down(off2, granularity); if (off2 <= off) return -ENODATA; *_data_start = off; if (off2 > start + len) *_data_len = len; else *_data_len = off2 - off; return 0; } /* * Handle completion of a write to the cache. */ static void cachefiles_write_complete(struct kiocb *iocb, long ret) { struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); struct cachefiles_object *object = ki->object; struct inode *inode = file_inode(ki->iocb.ki_filp); _enter("%ld", ret); if (ki->was_async) kiocb_end_write(iocb); if (ret < 0) trace_cachefiles_io_error(object, inode, ret, cachefiles_trace_write_error); atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); if (ki->term_func) ki->term_func(ki->term_func_priv, ret, ki->was_async); cachefiles_put_kiocb(ki); } /* * Initiate a write to the cache. */ int __cachefiles_write(struct cachefiles_object *object, struct file *file, loff_t start_pos, struct iov_iter *iter, netfs_io_terminated_t term_func, void *term_func_priv) { struct cachefiles_cache *cache; struct cachefiles_kiocb *ki; unsigned int old_nofs; ssize_t ret; size_t len = iov_iter_count(iter); fscache_count_write(); cache = object->volume->cache; _enter("%pD,%li,%llx,%zx/%llx", file, file_inode(file)->i_ino, start_pos, len, i_size_read(file_inode(file))); ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) { if (term_func) term_func(term_func_priv, -ENOMEM, false); return -ENOMEM; } refcount_set(&ki->ki_refcnt, 2); ki->iocb.ki_filp = file; ki->iocb.ki_pos = start_pos; ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; ki->iocb.ki_ioprio = get_current_ioprio(); ki->object = object; ki->start = start_pos; ki->len = len; ki->term_func = term_func; ki->term_func_priv = term_func_priv; ki->was_async = true; ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift; if (ki->term_func) ki->iocb.ki_complete = cachefiles_write_complete; atomic_long_add(ki->b_writing, &cache->b_writing); get_file(ki->iocb.ki_filp); cachefiles_grab_object(object, cachefiles_obj_get_ioreq); trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len); old_nofs = memalloc_nofs_save(); ret = cachefiles_inject_write_error(); if (ret == 0) ret = vfs_iocb_iter_write(file, &ki->iocb, iter); memalloc_nofs_restore(old_nofs); switch (ret) { case -EIOCBQUEUED: goto in_progress; case -ERESTARTSYS: case -ERESTARTNOINTR: case -ERESTARTNOHAND: case -ERESTART_RESTARTBLOCK: /* There's no easy way to restart the syscall since other AIO's * may be already running. Just fail this IO with EINTR. */ ret = -EINTR; fallthrough; default: ki->was_async = false; cachefiles_write_complete(&ki->iocb, ret); if (ret > 0) ret = 0; break; } in_progress: cachefiles_put_kiocb(ki); _leave(" = %zd", ret); return ret; } static int cachefiles_write(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, netfs_io_terminated_t term_func, void *term_func_priv) { if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) { if (term_func) term_func(term_func_priv, -ENOBUFS, false); trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite); return -ENOBUFS; } return __cachefiles_write(cachefiles_cres_object(cres), cachefiles_cres_file(cres), start_pos, iter, term_func, term_func_priv); } static inline enum netfs_io_source cachefiles_do_prepare_read(struct netfs_cache_resources *cres, loff_t start, size_t *_len, loff_t i_size, unsigned long *_flags, ino_t netfs_ino) { enum cachefiles_prepare_read_trace why; struct cachefiles_object *object = NULL; struct cachefiles_cache *cache; struct fscache_cookie *cookie = fscache_cres_cookie(cres); const struct cred *saved_cred; struct file *file = cachefiles_cres_file(cres); enum netfs_io_source ret = NETFS_DOWNLOAD_FROM_SERVER; size_t len = *_len; loff_t off, to; ino_t ino = file ? file_inode(file)->i_ino : 0; int rc; _enter("%zx @%llx/%llx", len, start, i_size); if (start >= i_size) { ret = NETFS_FILL_WITH_ZEROES; why = cachefiles_trace_read_after_eof; goto out_no_object; } if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) { __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags); why = cachefiles_trace_read_no_data; if (!test_bit(NETFS_SREQ_ONDEMAND, _flags)) goto out_no_object; } /* The object and the file may be being created in the background. */ if (!file) { why = cachefiles_trace_read_no_file; if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) goto out_no_object; file = cachefiles_cres_file(cres); if (!file) goto out_no_object; ino = file_inode(file)->i_ino; } object = cachefiles_cres_object(cres); cache = object->volume->cache; cachefiles_begin_secure(cache, &saved_cred); retry: off = cachefiles_inject_read_error(); if (off == 0) off = vfs_llseek(file, start, SEEK_DATA); if (off < 0 && off >= (loff_t)-MAX_ERRNO) { if (off == (loff_t)-ENXIO) { why = cachefiles_trace_read_seek_nxio; goto download_and_store; } trace_cachefiles_io_error(object, file_inode(file), off, cachefiles_trace_seek_error); why = cachefiles_trace_read_seek_error; goto out; } if (off >= start + len) { why = cachefiles_trace_read_found_hole; goto download_and_store; } if (off > start) { off = round_up(off, cache->bsize); len = off - start; *_len = len; why = cachefiles_trace_read_found_part; goto download_and_store; } to = cachefiles_inject_read_error(); if (to == 0) to = vfs_llseek(file, start, SEEK_HOLE); if (to < 0 && to >= (loff_t)-MAX_ERRNO) { trace_cachefiles_io_error(object, file_inode(file), to, cachefiles_trace_seek_error); why = cachefiles_trace_read_seek_error; goto out; } if (to < start + len) { if (start + len >= i_size) to = round_up(to, cache->bsize); else to = round_down(to, cache->bsize); len = to - start; *_len = len; } why = cachefiles_trace_read_have_data; ret = NETFS_READ_FROM_CACHE; goto out; download_and_store: __set_bit(NETFS_SREQ_COPY_TO_CACHE, _flags); if (test_bit(NETFS_SREQ_ONDEMAND, _flags)) { rc = cachefiles_ondemand_read(object, start, len); if (!rc) { __clear_bit(NETFS_SREQ_ONDEMAND, _flags); goto retry; } ret = NETFS_INVALID_READ; } out: cachefiles_end_secure(cache, saved_cred); out_no_object: trace_cachefiles_prep_read(object, start, len, *_flags, ret, why, ino, netfs_ino); return ret; } /* * Prepare a read operation, shortening it to a cached/uncached * boundary as appropriate. */ static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq, unsigned long long i_size) { return cachefiles_do_prepare_read(&subreq->rreq->cache_resources, subreq->start, &subreq->len, i_size, &subreq->flags, subreq->rreq->inode->i_ino); } /* * Prepare an on-demand read operation, shortening it to a cached/uncached * boundary as appropriate. */ static enum netfs_io_source cachefiles_prepare_ondemand_read(struct netfs_cache_resources *cres, loff_t start, size_t *_len, loff_t i_size, unsigned long *_flags, ino_t ino) { return cachefiles_do_prepare_read(cres, start, _len, i_size, _flags, ino); } /* * Prepare for a write to occur. */ int __cachefiles_prepare_write(struct cachefiles_object *object, struct file *file, loff_t *_start, size_t *_len, size_t upper_len, bool no_space_allocated_yet) { struct cachefiles_cache *cache = object->volume->cache; loff_t start = *_start, pos; size_t len = *_len; int ret; /* Round to DIO size */ start = round_down(*_start, PAGE_SIZE); if (start != *_start || *_len > upper_len) { /* Probably asked to cache a streaming write written into the * pagecache when the cookie was temporarily out of service to * culling. */ fscache_count_dio_misfit(); return -ENOBUFS; } *_len = round_up(len, PAGE_SIZE); /* We need to work out whether there's sufficient disk space to perform * the write - but we can skip that check if we have space already * allocated. */ if (no_space_allocated_yet) goto check_space; pos = cachefiles_inject_read_error(); if (pos == 0) pos = vfs_llseek(file, start, SEEK_DATA); if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { if (pos == -ENXIO) goto check_space; /* Unallocated tail */ trace_cachefiles_io_error(object, file_inode(file), pos, cachefiles_trace_seek_error); return pos; } if ((u64)pos >= (u64)start + *_len) goto check_space; /* Unallocated region */ /* We have a block that's at least partially filled - if we're low on * space, we need to see if it's fully allocated. If it's not, we may * want to cull it. */ if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, cachefiles_has_space_check) == 0) return 0; /* Enough space to simply overwrite the whole block */ pos = cachefiles_inject_read_error(); if (pos == 0) pos = vfs_llseek(file, start, SEEK_HOLE); if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) { trace_cachefiles_io_error(object, file_inode(file), pos, cachefiles_trace_seek_error); return pos; } if ((u64)pos >= (u64)start + *_len) return 0; /* Fully allocated */ /* Partially allocated, but insufficient space: cull. */ fscache_count_no_write_space(); ret = cachefiles_inject_remove_error(); if (ret == 0) ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, start, *_len); if (ret < 0) { trace_cachefiles_io_error(object, file_inode(file), ret, cachefiles_trace_fallocate_error); cachefiles_io_error_obj(object, "CacheFiles: fallocate failed (%d)\n", ret); ret = -EIO; } return ret; check_space: return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE, cachefiles_has_space_for_write); } static int cachefiles_prepare_write(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, size_t upper_len, loff_t i_size, bool no_space_allocated_yet) { struct cachefiles_object *object = cachefiles_cres_object(cres); struct cachefiles_cache *cache = object->volume->cache; const struct cred *saved_cred; int ret; if (!cachefiles_cres_file(cres)) { if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) return -ENOBUFS; if (!cachefiles_cres_file(cres)) return -ENOBUFS; } cachefiles_begin_secure(cache, &saved_cred); ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), _start, _len, upper_len, no_space_allocated_yet); cachefiles_end_secure(cache, saved_cred); return ret; } static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq) { struct netfs_io_request *wreq = subreq->rreq; struct netfs_cache_resources *cres = &wreq->cache_resources; struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; _enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start); stream->sreq_max_len = MAX_RW_COUNT; stream->sreq_max_segs = BIO_MAX_VECS; if (!cachefiles_cres_file(cres)) { if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) return netfs_prepare_write_failed(subreq); if (!cachefiles_cres_file(cres)) return netfs_prepare_write_failed(subreq); } } static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) { struct netfs_io_request *wreq = subreq->rreq; struct netfs_cache_resources *cres = &wreq->cache_resources; struct cachefiles_object *object = cachefiles_cres_object(cres); struct cachefiles_cache *cache = object->volume->cache; struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; const struct cred *saved_cred; size_t off, pre, post, len = subreq->len; loff_t start = subreq->start; int ret; _enter("W=%x[%x] %llx-%llx", wreq->debug_id, subreq->debug_index, start, start + len - 1); /* We need to start on the cache granularity boundary */ off = start & (CACHEFILES_DIO_BLOCK_SIZE - 1); if (off) { pre = CACHEFILES_DIO_BLOCK_SIZE - off; if (pre >= len) { fscache_count_dio_misfit(); netfs_write_subrequest_terminated(subreq, len, false); return; } subreq->transferred += pre; start += pre; len -= pre; iov_iter_advance(&subreq->io_iter, pre); } /* We also need to end on the cache granularity boundary */ if (start + len == wreq->i_size) { size_t part = len % CACHEFILES_DIO_BLOCK_SIZE; size_t need = CACHEFILES_DIO_BLOCK_SIZE - part; if (part && stream->submit_extendable_to >= need) { len += need; subreq->len += need; subreq->io_iter.count += need; } } post = len & (CACHEFILES_DIO_BLOCK_SIZE - 1); if (post) { len -= post; if (len == 0) { fscache_count_dio_misfit(); netfs_write_subrequest_terminated(subreq, post, false); return; } iov_iter_truncate(&subreq->io_iter, len); } trace_netfs_sreq(subreq, netfs_sreq_trace_cache_prepare); cachefiles_begin_secure(cache, &saved_cred); ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres), &start, &len, len, true); cachefiles_end_secure(cache, saved_cred); if (ret < 0) { netfs_write_subrequest_terminated(subreq, ret, false); return; } trace_netfs_sreq(subreq, netfs_sreq_trace_cache_write); cachefiles_write(&subreq->rreq->cache_resources, subreq->start, &subreq->io_iter, netfs_write_subrequest_terminated, subreq); } /* * Clean up an operation. */ static void cachefiles_end_operation(struct netfs_cache_resources *cres) { struct file *file = cachefiles_cres_file(cres); if (file) fput(file); fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end); } static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { .end_operation = cachefiles_end_operation, .read = cachefiles_read, .write = cachefiles_write, .issue_write = cachefiles_issue_write, .prepare_read = cachefiles_prepare_read, .prepare_write = cachefiles_prepare_write, .prepare_write_subreq = cachefiles_prepare_write_subreq, .prepare_ondemand_read = cachefiles_prepare_ondemand_read, .query_occupancy = cachefiles_query_occupancy, }; /* * Open the cache file when beginning a cache operation. */ bool cachefiles_begin_operation(struct netfs_cache_resources *cres, enum fscache_want_state want_state) { struct cachefiles_object *object = cachefiles_cres_object(cres); if (!cachefiles_cres_file(cres)) { cres->ops = &cachefiles_netfs_cache_ops; if (object->file) { spin_lock(&object->lock); if (!cres->cache_priv2 && object->file) cres->cache_priv2 = get_file(object->file); spin_unlock(&object->lock); } } if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) { pr_err("failed to get cres->file\n"); return false; } return true; }