// SPDX-License-Identifier: GPL-2.0-only /* * MMU-based software IOTLB. * * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved. * * Author: Xie Yongji * */ #include #include #include #include #include #include #include "iova_domain.h" static int vduse_iotlb_add_range(struct vduse_iova_domain *domain, u64 start, u64 last, u64 addr, unsigned int perm, struct file *file, u64 offset) { struct vdpa_map_file *map_file; int ret; map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC); if (!map_file) return -ENOMEM; map_file->file = get_file(file); map_file->offset = offset; ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last, addr, perm, map_file); if (ret) { fput(map_file->file); kfree(map_file); return ret; } return 0; } static void vduse_iotlb_del_range(struct vduse_iova_domain *domain, u64 start, u64 last) { struct vdpa_map_file *map_file; struct vhost_iotlb_map *map; while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) { map_file = (struct vdpa_map_file *)map->opaque; fput(map_file->file); kfree(map_file); vhost_iotlb_map_free(domain->iotlb, map); } } int vduse_domain_set_map(struct vduse_iova_domain *domain, struct vhost_iotlb *iotlb) { struct vdpa_map_file *map_file; struct vhost_iotlb_map *map; u64 start = 0ULL, last = ULLONG_MAX; int ret; spin_lock(&domain->iotlb_lock); vduse_iotlb_del_range(domain, start, last); for (map = vhost_iotlb_itree_first(iotlb, start, last); map; map = vhost_iotlb_itree_next(map, start, last)) { map_file = (struct vdpa_map_file *)map->opaque; ret = vduse_iotlb_add_range(domain, map->start, map->last, map->addr, map->perm, map_file->file, map_file->offset); if (ret) goto err; } spin_unlock(&domain->iotlb_lock); return 0; err: vduse_iotlb_del_range(domain, start, last); spin_unlock(&domain->iotlb_lock); return ret; } void vduse_domain_clear_map(struct vduse_iova_domain *domain, struct vhost_iotlb *iotlb) { struct vhost_iotlb_map *map; u64 start = 0ULL, last = ULLONG_MAX; spin_lock(&domain->iotlb_lock); for (map = vhost_iotlb_itree_first(iotlb, start, last); map; map = vhost_iotlb_itree_next(map, start, last)) { vduse_iotlb_del_range(domain, map->start, map->last); } spin_unlock(&domain->iotlb_lock); } static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, u64 iova, u64 size, u64 paddr) { struct vduse_bounce_map *map; u64 last = iova + size - 1; while (iova <= last) { map = &domain->bounce_maps[iova >> PAGE_SHIFT]; if (!map->bounce_page) { map->bounce_page = alloc_page(GFP_ATOMIC); if (!map->bounce_page) return -ENOMEM; } map->orig_phys = paddr; paddr += PAGE_SIZE; iova += PAGE_SIZE; } return 0; } static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain, u64 iova, u64 size) { struct vduse_bounce_map *map; u64 last = iova + size - 1; while (iova <= last) { map = &domain->bounce_maps[iova >> PAGE_SHIFT]; map->orig_phys = INVALID_PHYS_ADDR; iova += PAGE_SIZE; } } static void do_bounce(phys_addr_t orig, void *addr, size_t size, enum dma_data_direction dir) { unsigned long pfn = PFN_DOWN(orig); unsigned int offset = offset_in_page(orig); struct page *page; unsigned int sz = 0; while (size) { sz = min_t(size_t, PAGE_SIZE - offset, size); page = pfn_to_page(pfn); if (dir == DMA_TO_DEVICE) memcpy_from_page(addr, page, offset, sz); else memcpy_to_page(page, offset, addr, sz); size -= sz; pfn++; addr += sz; offset = 0; } } static void vduse_domain_bounce(struct vduse_iova_domain *domain, dma_addr_t iova, size_t size, enum dma_data_direction dir) { struct vduse_bounce_map *map; struct page *page; unsigned int offset; void *addr; size_t sz; if (iova >= domain->bounce_size) return; while (size) { map = &domain->bounce_maps[iova >> PAGE_SHIFT]; offset = offset_in_page(iova); sz = min_t(size_t, PAGE_SIZE - offset, size); if (WARN_ON(!map->bounce_page || map->orig_phys == INVALID_PHYS_ADDR)) return; page = domain->user_bounce_pages ? map->user_bounce_page : map->bounce_page; addr = kmap_local_page(page); do_bounce(map->orig_phys + offset, addr + offset, sz, dir); kunmap_local(addr); size -= sz; iova += sz; } } static struct page * vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova) { u64 start = iova & PAGE_MASK; u64 last = start + PAGE_SIZE - 1; struct vhost_iotlb_map *map; struct page *page = NULL; spin_lock(&domain->iotlb_lock); map = vhost_iotlb_itree_first(domain->iotlb, start, last); if (!map) goto out; page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT); get_page(page); out: spin_unlock(&domain->iotlb_lock); return page; } static struct page * vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova) { struct vduse_bounce_map *map; struct page *page = NULL; read_lock(&domain->bounce_lock); map = &domain->bounce_maps[iova >> PAGE_SHIFT]; if (domain->user_bounce_pages || !map->bounce_page) goto out; page = map->bounce_page; get_page(page); out: read_unlock(&domain->bounce_lock); return page; } static void vduse_domain_free_kernel_bounce_pages(struct vduse_iova_domain *domain) { struct vduse_bounce_map *map; unsigned long pfn, bounce_pfns; bounce_pfns = domain->bounce_size >> PAGE_SHIFT; for (pfn = 0; pfn < bounce_pfns; pfn++) { map = &domain->bounce_maps[pfn]; if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR)) continue; if (!map->bounce_page) continue; __free_page(map->bounce_page); map->bounce_page = NULL; } } int vduse_domain_add_user_bounce_pages(struct vduse_iova_domain *domain, struct page **pages, int count) { struct vduse_bounce_map *map; int i, ret; /* Now we don't support partial mapping */ if (count != (domain->bounce_size >> PAGE_SHIFT)) return -EINVAL; write_lock(&domain->bounce_lock); ret = -EEXIST; if (domain->user_bounce_pages) goto out; for (i = 0; i < count; i++) { map = &domain->bounce_maps[i]; if (map->bounce_page) { /* Copy kernel page to user page if it's in use */ if (map->orig_phys != INVALID_PHYS_ADDR) memcpy_to_page(pages[i], 0, page_address(map->bounce_page), PAGE_SIZE); } map->user_bounce_page = pages[i]; get_page(pages[i]); } domain->user_bounce_pages = true; ret = 0; out: write_unlock(&domain->bounce_lock); return ret; } void vduse_domain_remove_user_bounce_pages(struct vduse_iova_domain *domain) { struct vduse_bounce_map *map; unsigned long i, count; write_lock(&domain->bounce_lock); if (!domain->user_bounce_pages) goto out; count = domain->bounce_size >> PAGE_SHIFT; for (i = 0; i < count; i++) { struct page *page = NULL; map = &domain->bounce_maps[i]; if (WARN_ON(!map->user_bounce_page)) continue; /* Copy user page to kernel page if it's in use */ if (map->orig_phys != INVALID_PHYS_ADDR) { page = map->bounce_page; memcpy_from_page(page_address(page), map->user_bounce_page, 0, PAGE_SIZE); } put_page(map->user_bounce_page); map->user_bounce_page = NULL; } domain->user_bounce_pages = false; out: write_unlock(&domain->bounce_lock); } void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain) { if (!domain->bounce_map) return; spin_lock(&domain->iotlb_lock); if (!domain->bounce_map) goto unlock; vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1); domain->bounce_map = 0; unlock: spin_unlock(&domain->iotlb_lock); } static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain) { int ret = 0; if (domain->bounce_map) return 0; spin_lock(&domain->iotlb_lock); if (domain->bounce_map) goto unlock; ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1, 0, VHOST_MAP_RW, domain->file, 0); if (ret) goto unlock; domain->bounce_map = 1; unlock: spin_unlock(&domain->iotlb_lock); return ret; } static dma_addr_t vduse_domain_alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit) { unsigned long shift = iova_shift(iovad); unsigned long iova_len = iova_align(iovad, size) >> shift; unsigned long iova_pfn; iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true); return (dma_addr_t)iova_pfn << shift; } static void vduse_domain_free_iova(struct iova_domain *iovad, dma_addr_t iova, size_t size) { unsigned long shift = iova_shift(iovad); unsigned long iova_len = iova_align(iovad, size) >> shift; free_iova_fast(iovad, iova >> shift, iova_len); } void vduse_domain_sync_single_for_device(struct vduse_iova_domain *domain, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { read_lock(&domain->bounce_lock); if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) vduse_domain_bounce(domain, dma_addr, size, DMA_TO_DEVICE); read_unlock(&domain->bounce_lock); } void vduse_domain_sync_single_for_cpu(struct vduse_iova_domain *domain, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir) { read_lock(&domain->bounce_lock); if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); read_unlock(&domain->bounce_lock); } dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct iova_domain *iovad = &domain->stream_iovad; unsigned long limit = domain->bounce_size - 1; phys_addr_t pa = page_to_phys(page) + offset; dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); if (!iova) return DMA_MAPPING_ERROR; if (vduse_domain_init_bounce_map(domain)) goto err; read_lock(&domain->bounce_lock); if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa)) goto err_unlock; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE); read_unlock(&domain->bounce_lock); return iova; err_unlock: read_unlock(&domain->bounce_lock); err: vduse_domain_free_iova(iovad, iova, size); return DMA_MAPPING_ERROR; } void vduse_domain_unmap_page(struct vduse_iova_domain *domain, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct iova_domain *iovad = &domain->stream_iovad; read_lock(&domain->bounce_lock); if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE); vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size); read_unlock(&domain->bounce_lock); vduse_domain_free_iova(iovad, dma_addr, size); } void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain, size_t size, dma_addr_t *dma_addr, gfp_t flag, unsigned long attrs) { struct iova_domain *iovad = &domain->consistent_iovad; unsigned long limit = domain->iova_limit; dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit); void *orig = alloc_pages_exact(size, flag); if (!iova || !orig) goto err; spin_lock(&domain->iotlb_lock); if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1, virt_to_phys(orig), VHOST_MAP_RW, domain->file, (u64)iova)) { spin_unlock(&domain->iotlb_lock); goto err; } spin_unlock(&domain->iotlb_lock); *dma_addr = iova; return orig; err: *dma_addr = DMA_MAPPING_ERROR; if (orig) free_pages_exact(orig, size); if (iova) vduse_domain_free_iova(iovad, iova, size); return NULL; } void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { struct iova_domain *iovad = &domain->consistent_iovad; struct vhost_iotlb_map *map; struct vdpa_map_file *map_file; phys_addr_t pa; spin_lock(&domain->iotlb_lock); map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr, (u64)dma_addr + size - 1); if (WARN_ON(!map)) { spin_unlock(&domain->iotlb_lock); return; } map_file = (struct vdpa_map_file *)map->opaque; fput(map_file->file); kfree(map_file); pa = map->addr; vhost_iotlb_map_free(domain->iotlb, map); spin_unlock(&domain->iotlb_lock); vduse_domain_free_iova(iovad, dma_addr, size); free_pages_exact(phys_to_virt(pa), size); } static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf) { struct vduse_iova_domain *domain = vmf->vma->vm_private_data; unsigned long iova = vmf->pgoff << PAGE_SHIFT; struct page *page; if (!domain) return VM_FAULT_SIGBUS; if (iova < domain->bounce_size) page = vduse_domain_get_bounce_page(domain, iova); else page = vduse_domain_get_coherent_page(domain, iova); if (!page) return VM_FAULT_SIGBUS; vmf->page = page; return 0; } static const struct vm_operations_struct vduse_domain_mmap_ops = { .fault = vduse_domain_mmap_fault, }; static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma) { struct vduse_iova_domain *domain = file->private_data; vm_flags_set(vma, VM_DONTDUMP | VM_DONTEXPAND); vma->vm_private_data = domain; vma->vm_ops = &vduse_domain_mmap_ops; return 0; } static int vduse_domain_release(struct inode *inode, struct file *file) { struct vduse_iova_domain *domain = file->private_data; spin_lock(&domain->iotlb_lock); vduse_iotlb_del_range(domain, 0, ULLONG_MAX); vduse_domain_remove_user_bounce_pages(domain); vduse_domain_free_kernel_bounce_pages(domain); spin_unlock(&domain->iotlb_lock); put_iova_domain(&domain->stream_iovad); put_iova_domain(&domain->consistent_iovad); vhost_iotlb_free(domain->iotlb); vfree(domain->bounce_maps); kfree(domain); return 0; } static const struct file_operations vduse_domain_fops = { .owner = THIS_MODULE, .mmap = vduse_domain_mmap, .release = vduse_domain_release, }; void vduse_domain_destroy(struct vduse_iova_domain *domain) { fput(domain->file); } struct vduse_iova_domain * vduse_domain_create(unsigned long iova_limit, size_t bounce_size) { struct vduse_iova_domain *domain; struct file *file; struct vduse_bounce_map *map; unsigned long pfn, bounce_pfns; int ret; bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT; if (iova_limit <= bounce_size) return NULL; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; domain->iotlb = vhost_iotlb_alloc(0, 0); if (!domain->iotlb) goto err_iotlb; domain->iova_limit = iova_limit; domain->bounce_size = PAGE_ALIGN(bounce_size); domain->bounce_maps = vzalloc(bounce_pfns * sizeof(struct vduse_bounce_map)); if (!domain->bounce_maps) goto err_map; for (pfn = 0; pfn < bounce_pfns; pfn++) { map = &domain->bounce_maps[pfn]; map->orig_phys = INVALID_PHYS_ADDR; } file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops, domain, O_RDWR); if (IS_ERR(file)) goto err_file; domain->file = file; rwlock_init(&domain->bounce_lock); spin_lock_init(&domain->iotlb_lock); init_iova_domain(&domain->stream_iovad, PAGE_SIZE, IOVA_START_PFN); ret = iova_domain_init_rcaches(&domain->stream_iovad); if (ret) goto err_iovad_stream; init_iova_domain(&domain->consistent_iovad, PAGE_SIZE, bounce_pfns); ret = iova_domain_init_rcaches(&domain->consistent_iovad); if (ret) goto err_iovad_consistent; return domain; err_iovad_consistent: put_iova_domain(&domain->stream_iovad); err_iovad_stream: fput(file); err_file: vfree(domain->bounce_maps); err_map: vhost_iotlb_free(domain->iotlb); err_iotlb: kfree(domain); return NULL; } int vduse_domain_init(void) { return iova_cache_get(); } void vduse_domain_exit(void) { iova_cache_put(); }