// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2014, The Linux Foundation. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include struct page_change_data { pgprot_t set_mask; pgprot_t clear_mask; }; bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED); bool can_set_direct_map(void) { /* * rodata_full, DEBUG_PAGEALLOC and a Realm guest all require linear * map to be mapped at page granularity, so that it is possible to * protect/unprotect single pages. * * KFENCE pool requires page-granular mapping if initialized late. * * Realms need to make pages shared/protected at page granularity. */ return rodata_full || debug_pagealloc_enabled() || arm64_kfence_can_set_direct_map() || is_realm_world(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) { struct page_change_data *cdata = data; pte_t pte = __ptep_get(ptep); pte = clear_pte_bit(pte, cdata->clear_mask); pte = set_pte_bit(pte, cdata->set_mask); __set_pte(ptep, pte); return 0; } /* * This function assumes that the range is mapped with PAGE_SIZE pages. */ static int __change_memory_common(unsigned long start, unsigned long size, pgprot_t set_mask, pgprot_t clear_mask) { struct page_change_data data; int ret; data.set_mask = set_mask; data.clear_mask = clear_mask; ret = apply_to_page_range(&init_mm, start, size, change_page_range, &data); /* * If the memory is being made valid without changing any other bits * then a TLBI isn't required as a non-valid entry cannot be cached in * the TLB. */ if (pgprot_val(set_mask) != PTE_VALID || pgprot_val(clear_mask)) flush_tlb_kernel_range(start, start + size); return ret; } static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; unsigned long size = PAGE_SIZE * numpages; unsigned long end = start + size; struct vm_struct *area; int i; if (!PAGE_ALIGNED(addr)) { start &= PAGE_MASK; end = start + size; WARN_ON_ONCE(1); } /* * Kernel VA mappings are always live, and splitting live section * mappings into page mappings may cause TLB conflicts. This means * we have to ensure that changing the permission bits of the range * we are operating on does not result in such splitting. * * Let's restrict ourselves to mappings created by vmalloc (or vmap). * Those are guaranteed to consist entirely of page mappings, and * splitting is never needed. * * So check whether the [addr, addr + size) interval is entirely * covered by precisely one VM area that has the VM_ALLOC flag set. */ area = find_vm_area((void *)addr); if (!area || end > (unsigned long)kasan_reset_tag(area->addr) + area->size || !(area->flags & VM_ALLOC)) return -EINVAL; if (!numpages) return 0; /* * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), PAGE_SIZE, set_mask, clear_mask); } } /* * Get rid of potentially aliasing lazily unmapped vm areas that may * have permissions set that deviate from the ones we are setting here. */ vm_unmap_aliases(); return __change_memory_common(start, size, set_mask, clear_mask); } int set_memory_ro(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, __pgprot(PTE_RDONLY), __pgprot(PTE_WRITE)); } int set_memory_rw(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, __pgprot(PTE_WRITE), __pgprot(PTE_RDONLY)); } int set_memory_nx(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, __pgprot(PTE_PXN), __pgprot(PTE_MAYBE_GP)); } int set_memory_x(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, __pgprot(PTE_MAYBE_GP), __pgprot(PTE_PXN)); } int set_memory_valid(unsigned long addr, int numpages, int enable) { if (enable) return __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(PTE_VALID), __pgprot(0)); else return __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(0), __pgprot(PTE_VALID)); } int set_direct_map_invalid_noflush(struct page *page) { struct page_change_data data = { .set_mask = __pgprot(0), .clear_mask = __pgprot(PTE_VALID), }; if (!can_set_direct_map()) return 0; return apply_to_page_range(&init_mm, (unsigned long)page_address(page), PAGE_SIZE, change_page_range, &data); } int set_direct_map_default_noflush(struct page *page) { struct page_change_data data = { .set_mask = __pgprot(PTE_VALID | PTE_WRITE), .clear_mask = __pgprot(PTE_RDONLY), }; if (!can_set_direct_map()) return 0; return apply_to_page_range(&init_mm, (unsigned long)page_address(page), PAGE_SIZE, change_page_range, &data); } static int __set_memory_enc_dec(unsigned long addr, int numpages, bool encrypt) { unsigned long set_prot = 0, clear_prot = 0; phys_addr_t start, end; int ret; if (!is_realm_world()) return 0; if (!__is_lm_address(addr)) return -EINVAL; start = __virt_to_phys(addr); end = start + numpages * PAGE_SIZE; if (encrypt) clear_prot = PROT_NS_SHARED; else set_prot = PROT_NS_SHARED; /* * Break the mapping before we make any changes to avoid stale TLB * entries or Synchronous External Aborts caused by RIPAS_EMPTY */ ret = __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(set_prot), __pgprot(clear_prot | PTE_VALID)); if (ret) return ret; if (encrypt) ret = rsi_set_memory_range_protected(start, end); else ret = rsi_set_memory_range_shared(start, end); if (ret) return ret; return __change_memory_common(addr, PAGE_SIZE * numpages, __pgprot(PTE_VALID), __pgprot(0)); } static int realm_set_memory_encrypted(unsigned long addr, int numpages) { int ret = __set_memory_enc_dec(addr, numpages, true); /* * If the request to change state fails, then the only sensible cause * of action for the caller is to leak the memory */ WARN(ret, "Failed to encrypt memory, %d pages will be leaked", numpages); return ret; } static int realm_set_memory_decrypted(unsigned long addr, int numpages) { int ret = __set_memory_enc_dec(addr, numpages, false); WARN(ret, "Failed to decrypt memory, %d pages will be leaked", numpages); return ret; } static const struct arm64_mem_crypt_ops realm_crypt_ops = { .encrypt = realm_set_memory_encrypted, .decrypt = realm_set_memory_decrypted, }; int realm_register_memory_enc_ops(void) { return arm64_mem_crypt_ops_register(&realm_crypt_ops); } int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid) { unsigned long addr = (unsigned long)page_address(page); if (!can_set_direct_map()) return 0; return set_memory_valid(addr, nr, valid); } #ifdef CONFIG_DEBUG_PAGEALLOC /* * This is - apart from the return value - doing the same * thing as the new set_direct_map_valid_noflush() function. * * Unify? Explain the conceptual differences? */ void __kernel_map_pages(struct page *page, int numpages, int enable) { if (!can_set_direct_map()) return; set_memory_valid((unsigned long)page_address(page), numpages, enable); } #endif /* CONFIG_DEBUG_PAGEALLOC */ /* * This function is used to determine if a linear map page has been marked as * not-valid. Walk the page table and check the PTE_VALID bit. * * Because this is only called on the kernel linear map, p?d_sect() implies * p?d_present(). When debug_pagealloc is enabled, sections mappings are * disabled. */ bool kernel_page_present(struct page *page) { pgd_t *pgdp; p4d_t *p4dp; pud_t *pudp, pud; pmd_t *pmdp, pmd; pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; p4dp = p4d_offset(pgdp, addr); if (p4d_none(READ_ONCE(*p4dp))) return false; pudp = pud_offset(p4dp, addr); pud = READ_ONCE(*pudp); if (pud_none(pud)) return false; if (pud_sect(pud)) return true; pmdp = pmd_offset(pudp, addr); pmd = READ_ONCE(*pmdp); if (pmd_none(pmd)) return false; if (pmd_sect(pmd)) return true; ptep = pte_offset_kernel(pmdp, addr); return pte_valid(__ptep_get(ptep)); }