// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2020-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_bmap.h" #include "xfs_quota.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_bmap_util.h" #include "xfs_exchmaps.h" #include "xfs_exchrange.h" #include "xfs_ag.h" #include "xfs_parent.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/tempfile.h" #include "scrub/tempexch.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/xfblob.h" #include "scrub/iscan.h" #include "scrub/readdir.h" #include "scrub/reap.h" #include "scrub/findparent.h" #include "scrub/orphanage.h" #include "scrub/listxattr.h" /* * Directory Repair * ================ * * We repair directories by reading the directory data blocks looking for * directory entries that look salvageable (name passes verifiers, entry points * to a valid allocated inode, etc). Each entry worth salvaging is stashed in * memory, and the stashed entries are periodically replayed into a temporary * directory to constrain memory use. Batching the construction of the * temporary directory in this fashion reduces lock cycling of the directory * being repaired and the temporary directory, and will later become important * for parent pointer scanning. * * If parent pointers are enabled on this filesystem, we instead reconstruct * the directory by visiting each parent pointer of each file in the filesystem * and translating the relevant parent pointer records into dirents. In this * case, it is advantageous to stash all directory entries created from parent * pointers for a single child file before replaying them into the temporary * directory. To save memory, the live filesystem scan reuses the findparent * fields. Directory repair chooses either parent pointer scanning or * directory entry salvaging, but not both. * * Directory entries added to the temporary directory do not elevate the link * counts of the inodes found. When salvaging completes, the remaining stashed * entries are replayed to the temporary directory. An atomic mapping exchange * is used to commit the new directory blocks to the directory being repaired. * This will disrupt readdir cursors. * * Locking Issues * -------------- * * If /a, /a/b, and /c are all directories, the VFS does not take i_rwsem on * /a/b for a "mv /a/b /c/" operation. This means that only b's ILOCK protects * b's dotdot update. This is in contrast to every other dotdot update (link, * remove, mkdir). If the repair code drops the ILOCK, it must either * revalidate the dotdot entry or use dirent hooks to capture updates from * other threads. */ /* Create a dirent in the tempdir. */ #define XREP_DIRENT_ADD (1) /* Remove a dirent from the tempdir. */ #define XREP_DIRENT_REMOVE (2) /* Directory entry to be restored in the new directory. */ struct xrep_dirent { /* Cookie for retrieval of the dirent name. */ xfblob_cookie name_cookie; /* Target inode number. */ xfs_ino_t ino; /* Length of the dirent name. */ uint8_t namelen; /* File type of the dirent. */ uint8_t ftype; /* XREP_DIRENT_{ADD,REMOVE} */ uint8_t action; }; /* * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names * before we write them to the temp dir. */ #define XREP_DIR_MAX_STASH_BYTES (PAGE_SIZE * 8) struct xrep_dir { struct xfs_scrub *sc; /* Fixed-size array of xrep_dirent structures. */ struct xfarray *dir_entries; /* Blobs containing directory entry names. */ struct xfblob *dir_names; /* Information for exchanging data forks at the end. */ struct xrep_tempexch tx; /* Preallocated args struct for performing dir operations */ struct xfs_da_args args; /* * Information used to scan the filesystem to find the inumber of the * dotdot entry for this directory. For directory salvaging when * parent pointers are not enabled, we use the findparent_* functions * on this object and access only the parent_ino field directly. * * When parent pointers are enabled, however, the pptr scanner uses the * iscan, hooks, lock, and parent_ino fields of this object directly. * @pscan.lock coordinates access to dir_entries, dir_names, * parent_ino, subdirs, dirents, and args. This reduces the memory * requirements of this structure. */ struct xrep_parent_scan_info pscan; /* * Context information for attaching this directory to the lost+found * if this directory does not have a parent. */ struct xrep_adoption adoption; /* How many subdirectories did we find? */ uint64_t subdirs; /* How many dirents did we find? */ unsigned int dirents; /* Should we move this directory to the orphanage? */ bool needs_adoption; /* Directory entry name, plus the trailing null. */ struct xfs_name xname; unsigned char namebuf[MAXNAMELEN]; }; /* Tear down all the incore stuff we created. */ static void xrep_dir_teardown( struct xfs_scrub *sc) { struct xrep_dir *rd = sc->buf; xrep_findparent_scan_teardown(&rd->pscan); xfblob_destroy(rd->dir_names); xfarray_destroy(rd->dir_entries); } /* Set up for a directory repair. */ int xrep_setup_directory( struct xfs_scrub *sc) { struct xrep_dir *rd; int error; xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); error = xrep_orphanage_try_create(sc); if (error) return error; error = xrep_tempfile_create(sc, S_IFDIR); if (error) return error; rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS); if (!rd) return -ENOMEM; rd->sc = sc; rd->xname.name = rd->namebuf; sc->buf = rd; return 0; } /* * Look up the dotdot entry and confirm that it's really the parent. * Returns NULLFSINO if we don't know what to do. */ static inline xfs_ino_t xrep_dir_lookup_parent( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; xfs_ino_t ino; int error; error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL); if (error) return NULLFSINO; if (!xfs_verify_dir_ino(sc->mp, ino)) return NULLFSINO; error = xrep_findparent_confirm(sc, &ino); if (error) return NULLFSINO; return ino; } /* * Look up '..' in the dentry cache and confirm that it's really the parent. * Returns NULLFSINO if the dcache misses or if the hit is implausible. */ static inline xfs_ino_t xrep_dir_dcache_parent( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; xfs_ino_t parent_ino; int error; parent_ino = xrep_findparent_from_dcache(sc); if (parent_ino == NULLFSINO) return parent_ino; error = xrep_findparent_confirm(sc, &parent_ino); if (error) return NULLFSINO; return parent_ino; } /* Try to find the parent of the directory being repaired. */ STATIC int xrep_dir_find_parent( struct xrep_dir *rd) { xfs_ino_t ino; ino = xrep_findparent_self_reference(rd->sc); if (ino != NULLFSINO) { xrep_findparent_scan_finish_early(&rd->pscan, ino); return 0; } ino = xrep_dir_dcache_parent(rd); if (ino != NULLFSINO) { xrep_findparent_scan_finish_early(&rd->pscan, ino); return 0; } ino = xrep_dir_lookup_parent(rd); if (ino != NULLFSINO) { xrep_findparent_scan_finish_early(&rd->pscan, ino); return 0; } /* * A full filesystem scan is the last resort. On a busy filesystem, * the scan can fail with -EBUSY if we cannot grab IOLOCKs. That means * that we don't know what who the parent is, so we should return to * userspace. */ return xrep_findparent_scan(&rd->pscan); } /* * Decide if we want to salvage this entry. We don't bother with oversized * names or the dot entry. */ STATIC int xrep_dir_want_salvage( struct xrep_dir *rd, const char *name, int namelen, xfs_ino_t ino) { struct xfs_mount *mp = rd->sc->mp; /* No pointers to ourselves or to garbage. */ if (ino == rd->sc->ip->i_ino) return false; if (!xfs_verify_dir_ino(mp, ino)) return false; /* No weird looking names or dot entries. */ if (namelen >= MAXNAMELEN || namelen <= 0) return false; if (namelen == 1 && name[0] == '.') return false; if (!xfs_dir2_namecheck(name, namelen)) return false; return true; } /* * Remember that we want to create a dirent in the tempdir. These stashed * actions will be replayed later. */ STATIC int xrep_dir_stash_createname( struct xrep_dir *rd, const struct xfs_name *name, xfs_ino_t ino) { struct xrep_dirent dirent = { .action = XREP_DIRENT_ADD, .ino = ino, .namelen = name->len, .ftype = name->type, }; int error; trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino); error = xfblob_storename(rd->dir_names, &dirent.name_cookie, name); if (error) return error; return xfarray_append(rd->dir_entries, &dirent); } /* * Remember that we want to remove a dirent from the tempdir. These stashed * actions will be replayed later. */ STATIC int xrep_dir_stash_removename( struct xrep_dir *rd, const struct xfs_name *name, xfs_ino_t ino) { struct xrep_dirent dirent = { .action = XREP_DIRENT_REMOVE, .ino = ino, .namelen = name->len, .ftype = name->type, }; int error; trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino); error = xfblob_storename(rd->dir_names, &dirent.name_cookie, name); if (error) return error; return xfarray_append(rd->dir_entries, &dirent); } /* Allocate an in-core record to hold entries while we rebuild the dir data. */ STATIC int xrep_dir_salvage_entry( struct xrep_dir *rd, unsigned char *name, unsigned int namelen, xfs_ino_t ino) { struct xfs_name xname = { .name = name, }; struct xfs_scrub *sc = rd->sc; struct xfs_inode *ip; unsigned int i = 0; int error = 0; if (xchk_should_terminate(sc, &error)) return error; /* * Truncate the name to the first character that would trip namecheck. * If we no longer have a name after that, ignore this entry. */ while (i < namelen && name[i] != 0 && name[i] != '/') i++; if (i == 0) return 0; xname.len = i; /* Ignore '..' entries; we already picked the new parent. */ if (xname.len == 2 && name[0] == '.' && name[1] == '.') { trace_xrep_dir_salvaged_parent(sc->ip, ino); return 0; } trace_xrep_dir_salvage_entry(sc->ip, &xname, ino); /* * Compute the ftype or dump the entry if we can't. We don't lock the * inode because inodes can't change type while we have a reference. */ error = xchk_iget(sc, ino, &ip); if (error) return 0; /* Don't mix metadata and regular directory trees. */ if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(rd->sc->ip)) { xchk_irele(sc, ip); return 0; } xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode); xchk_irele(sc, ip); return xrep_dir_stash_createname(rd, &xname, ino); } /* Record a shortform directory entry for later reinsertion. */ STATIC int xrep_dir_salvage_sf_entry( struct xrep_dir *rd, struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep) { xfs_ino_t ino; ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep); if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino)) return 0; return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino); } /* Record a regular directory entry for later reinsertion. */ STATIC int xrep_dir_salvage_data_entry( struct xrep_dir *rd, struct xfs_dir2_data_entry *dep) { xfs_ino_t ino; ino = be64_to_cpu(dep->inumber); if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino)) return 0; return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino); } /* Try to recover block/data format directory entries. */ STATIC int xrep_dir_recover_data( struct xrep_dir *rd, struct xfs_buf *bp) { struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo; unsigned int offset; unsigned int end; int error = 0; /* * Loop over the data portion of the block. * Each object is a real entry (dep) or an unused one (dup). */ offset = geo->data_entry_offset; end = min_t(unsigned int, BBTOB(bp->b_length), xfs_dir3_data_end_offset(geo, bp->b_addr)); while (offset < end) { struct xfs_dir2_data_unused *dup = bp->b_addr + offset; struct xfs_dir2_data_entry *dep = bp->b_addr + offset; if (xchk_should_terminate(rd->sc, &error)) return error; /* Skip unused entries. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { offset += be16_to_cpu(dup->length); continue; } /* Don't walk off the end of the block. */ offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen); if (offset > end) break; /* Ok, let's save this entry. */ error = xrep_dir_salvage_data_entry(rd, dep); if (error) return error; } return 0; } /* Try to recover shortform directory entries. */ STATIC int xrep_dir_recover_sf( struct xrep_dir *rd) { struct xfs_dir2_sf_hdr *hdr; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next; struct xfs_ifork *ifp; xfs_ino_t ino; unsigned char *end; int error = 0; ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK); hdr = ifp->if_data; end = (unsigned char *)ifp->if_data + ifp->if_bytes; ino = xfs_dir2_sf_get_parent_ino(hdr); trace_xrep_dir_salvaged_parent(rd->sc->ip, ino); sfep = xfs_dir2_sf_firstentry(hdr); while ((unsigned char *)sfep < end) { if (xchk_should_terminate(rd->sc, &error)) return error; next = xfs_dir2_sf_nextentry(rd->sc->mp, hdr, sfep); if ((unsigned char *)next > end) break; /* Ok, let's save this entry. */ error = xrep_dir_salvage_sf_entry(rd, hdr, sfep); if (error) return error; sfep = next; } return 0; } /* * Try to figure out the format of this directory from the data fork mappings * and the directory size. If we can be reasonably sure of format, we can be * more aggressive in salvaging directory entries. On return, @magic_guess * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format" * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory, * and 0 if we can't tell. */ STATIC void xrep_dir_guess_format( struct xrep_dir *rd, __be32 *magic_guess) { struct xfs_inode *dp = rd->sc->ip; struct xfs_mount *mp = rd->sc->mp; struct xfs_da_geometry *geo = mp->m_dir_geo; xfs_fileoff_t last; int error; ASSERT(xfs_has_crc(mp)); *magic_guess = 0; /* * If there's a single directory block and the directory size is * exactly one block, this has to be a single block format directory. */ error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK); if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize && dp->i_disk_size == geo->blksize) { *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); return; } /* * If the last extent before the leaf offset matches the directory * size and the directory size is larger than 1 block, this is a * data format directory. */ last = geo->leafblk; error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK); if (!error && XFS_FSB_TO_B(mp, last) > geo->blksize && XFS_FSB_TO_B(mp, last) == dp->i_disk_size) { *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC); return; } } /* Recover directory entries from a specific directory block. */ STATIC int xrep_dir_recover_dirblock( struct xrep_dir *rd, __be32 magic_guess, xfs_dablk_t dabno) { struct xfs_dir2_data_hdr *hdr; struct xfs_buf *bp; __be32 oldmagic; int error; /* * Try to read buffer. We invalidate them in the next step so we don't * bother to set a buffer type or ops. */ error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno, XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL); if (error || !bp) return error; hdr = bp->b_addr; oldmagic = hdr->magic; trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno, be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess)); /* * If we're sure of the block's format, proceed with the salvage * operation using the specified magic number. */ if (magic_guess) { hdr->magic = magic_guess; goto recover; } /* * If we couldn't guess what type of directory this is, then we will * only salvage entries from directory blocks that match the magic * number and pass verifiers. */ switch (hdr->magic) { case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops)) goto out; if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL) goto out; break; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops)) goto out; if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL) goto out; break; default: goto out; } recover: error = xrep_dir_recover_data(rd, bp); out: hdr->magic = oldmagic; xfs_trans_brelse(rd->sc->tp, bp); return error; } static inline void xrep_dir_init_args( struct xrep_dir *rd, struct xfs_inode *dp, const struct xfs_name *name) { memset(&rd->args, 0, sizeof(struct xfs_da_args)); rd->args.geo = rd->sc->mp->m_dir_geo; rd->args.whichfork = XFS_DATA_FORK; rd->args.owner = rd->sc->ip->i_ino; rd->args.trans = rd->sc->tp; rd->args.dp = dp; if (!name) return; rd->args.name = name->name; rd->args.namelen = name->len; rd->args.filetype = name->type; rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name); } /* Replay a stashed createname into the temporary directory. */ STATIC int xrep_dir_replay_createname( struct xrep_dir *rd, const struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t total) { struct xfs_scrub *sc = rd->sc; struct xfs_inode *dp = rd->sc->tempip; int error; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); error = xfs_dir_ino_validate(sc->mp, inum); if (error) return error; trace_xrep_dir_replay_createname(dp, name, inum); xrep_dir_init_args(rd, dp, name); rd->args.inumber = inum; rd->args.total = total; rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; return xfs_dir_createname_args(&rd->args); } /* Replay a stashed removename onto the temporary directory. */ STATIC int xrep_dir_replay_removename( struct xrep_dir *rd, const struct xfs_name *name, xfs_extlen_t total) { struct xfs_inode *dp = rd->args.dp; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); xrep_dir_init_args(rd, dp, name); rd->args.op_flags = 0; rd->args.total = total; trace_xrep_dir_replay_removename(dp, name, 0); return xfs_dir_removename_args(&rd->args); } /* * Add this stashed incore directory entry to the temporary directory. * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and * must not be in transaction context. */ STATIC int xrep_dir_replay_update( struct xrep_dir *rd, const struct xfs_name *xname, const struct xrep_dirent *dirent) { struct xfs_mount *mp = rd->sc->mp; #ifdef DEBUG xfs_ino_t ino; #endif uint resblks; int error; resblks = xfs_link_space_res(mp, xname->len); error = xchk_trans_alloc(rd->sc, resblks); if (error) return error; /* Lock the temporary directory and join it to the transaction */ xrep_tempfile_ilock(rd->sc); xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0); switch (dirent->action) { case XREP_DIRENT_ADD: /* * Create a replacement dirent in the temporary directory. * Note that _createname doesn't check for existing entries. * There shouldn't be any in the temporary dir, but we'll * verify this in debug mode. */ #ifdef DEBUG error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino); if (error != -ENOENT) { ASSERT(error != -ENOENT); goto out_cancel; } #endif error = xrep_dir_replay_createname(rd, xname, dirent->ino, resblks); if (error) goto out_cancel; if (xname->type == XFS_DIR3_FT_DIR) rd->subdirs++; rd->dirents++; break; case XREP_DIRENT_REMOVE: /* * Remove a dirent from the temporary directory. Note that * _removename doesn't check the inode target of the exist * entry. There should be a perfect match in the temporary * dir, but we'll verify this in debug mode. */ #ifdef DEBUG error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino); if (error) { ASSERT(error != 0); goto out_cancel; } if (ino != dirent->ino) { ASSERT(ino == dirent->ino); error = -EIO; goto out_cancel; } #endif error = xrep_dir_replay_removename(rd, xname, resblks); if (error) goto out_cancel; if (xname->type == XFS_DIR3_FT_DIR) rd->subdirs--; rd->dirents--; break; default: ASSERT(0); error = -EIO; goto out_cancel; } /* Commit and unlock. */ error = xrep_trans_commit(rd->sc); if (error) return error; xrep_tempfile_iunlock(rd->sc); return 0; out_cancel: xchk_trans_cancel(rd->sc); xrep_tempfile_iunlock(rd->sc); return error; } /* * Flush stashed incore dirent updates that have been recorded by the scanner. * This is done to reduce the memory requirements of the directory rebuild, * since directories can contain up to 32GB of directory data. * * Caller must not hold transactions or ILOCKs. Caller must hold the tempdir * IOLOCK. */ STATIC int xrep_dir_replay_updates( struct xrep_dir *rd) { xfarray_idx_t array_cur; int error; /* Add all the salvaged dirents to the temporary directory. */ mutex_lock(&rd->pscan.lock); foreach_xfarray_idx(rd->dir_entries, array_cur) { struct xrep_dirent dirent; error = xfarray_load(rd->dir_entries, array_cur, &dirent); if (error) goto out_unlock; error = xfblob_loadname(rd->dir_names, dirent.name_cookie, &rd->xname, dirent.namelen); if (error) goto out_unlock; rd->xname.type = dirent.ftype; mutex_unlock(&rd->pscan.lock); error = xrep_dir_replay_update(rd, &rd->xname, &dirent); if (error) return error; mutex_lock(&rd->pscan.lock); } /* Empty out both arrays now that we've added the entries. */ xfarray_truncate(rd->dir_entries); xfblob_truncate(rd->dir_names); mutex_unlock(&rd->pscan.lock); return 0; out_unlock: mutex_unlock(&rd->pscan.lock); return error; } /* * Periodically flush stashed directory entries to the temporary dir. This * is done to reduce the memory requirements of the directory rebuild, since * directories can contain up to 32GB of directory data. */ STATIC int xrep_dir_flush_stashed( struct xrep_dir *rd) { int error; /* * Entering this function, the scrub context has a reference to the * inode being repaired, the temporary file, and a scrub transaction * that we use during dirent salvaging to avoid livelocking if there * are cycles in the directory structures. We hold ILOCK_EXCL on both * the inode being repaired and the temporary file, though they are * not ijoined to the scrub transaction. * * To constrain kernel memory use, we occasionally write salvaged * dirents from the xfarray and xfblob structures into the temporary * directory in preparation for exchanging the directory structures at * the end. Updating the temporary file requires a transaction, so we * commit the scrub transaction and drop the two ILOCKs so that * we can allocate whatever transaction we want. * * We still hold IOLOCK_EXCL on the inode being repaired, which * prevents anyone from accessing the damaged directory data while we * repair it. */ error = xrep_trans_commit(rd->sc); if (error) return error; xchk_iunlock(rd->sc, XFS_ILOCK_EXCL); /* * Take the IOLOCK of the temporary file while we modify dirents. This * isn't strictly required because the temporary file is never revealed * to userspace, but we follow the same locking rules. We still hold * sc->ip's IOLOCK. */ error = xrep_tempfile_iolock_polled(rd->sc); if (error) return error; /* Write to the tempdir all the updates that we've stashed. */ error = xrep_dir_replay_updates(rd); xrep_tempfile_iounlock(rd->sc); if (error) return error; /* * Recreate the salvage transaction and relock the dir we're salvaging. */ error = xchk_trans_alloc(rd->sc, 0); if (error) return error; xchk_ilock(rd->sc, XFS_ILOCK_EXCL); return 0; } /* Decide if we've stashed too much dirent data in memory. */ static inline bool xrep_dir_want_flush_stashed( struct xrep_dir *rd) { unsigned long long bytes; bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names); return bytes > XREP_DIR_MAX_STASH_BYTES; } /* Extract as many directory entries as we can. */ STATIC int xrep_dir_recover( struct xrep_dir *rd) { struct xfs_bmbt_irec got; struct xfs_scrub *sc = rd->sc; struct xfs_da_geometry *geo = sc->mp->m_dir_geo; xfs_fileoff_t offset; xfs_dablk_t dabno; __be32 magic_guess; int nmap; int error; xrep_dir_guess_format(rd, &magic_guess); /* Iterate each directory data block in the data fork. */ for (offset = 0; offset < geo->leafblk; offset = got.br_startoff + got.br_blockcount) { nmap = 1; error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset, &got, &nmap, 0); if (error) return error; if (nmap != 1) return -EFSCORRUPTED; if (!xfs_bmap_is_written_extent(&got)) continue; for (dabno = round_up(got.br_startoff, geo->fsbcount); dabno < got.br_startoff + got.br_blockcount; dabno += geo->fsbcount) { if (xchk_should_terminate(rd->sc, &error)) return error; error = xrep_dir_recover_dirblock(rd, magic_guess, dabno); if (error) return error; /* Flush dirents to constrain memory usage. */ if (xrep_dir_want_flush_stashed(rd)) { error = xrep_dir_flush_stashed(rd); if (error) return error; } } } return 0; } /* * Find all the directory entries for this inode by scraping them out of the * directory leaf blocks by hand, and flushing them into the temp dir. */ STATIC int xrep_dir_find_entries( struct xrep_dir *rd) { struct xfs_inode *dp = rd->sc->ip; int error; /* * Salvage directory entries from the old directory, and write them to * the temporary directory. */ if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) { error = xrep_dir_recover_sf(rd); } else { error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK); if (error) return error; error = xrep_dir_recover(rd); } if (error) return error; return xrep_dir_flush_stashed(rd); } /* Scan all files in the filesystem for dirents. */ STATIC int xrep_dir_salvage_entries( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; int error; /* * Drop the ILOCK on this directory so that we can scan for this * directory's parent. Figure out who is going to be the parent of * this directory, then retake the ILOCK so that we can salvage * directory entries. */ xchk_iunlock(sc, XFS_ILOCK_EXCL); error = xrep_dir_find_parent(rd); xchk_ilock(sc, XFS_ILOCK_EXCL); if (error) return error; /* * Collect directory entries by parsing raw leaf blocks to salvage * whatever we can. When we're done, free the staging memory before * exchanging the directories to reduce memory usage. */ error = xrep_dir_find_entries(rd); if (error) return error; /* * Cancel the repair transaction and drop the ILOCK so that we can * (later) use the atomic mapping exchange functions to compute the * correct block reservations and re-lock the inodes. * * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory * modifications, but there's nothing to prevent userspace from reading * the directory until we're ready for the exchange operation. Reads * will return -EIO without shutting down the fs, so we're ok with * that. * * The VFS can change dotdot on us, but the findparent scan will keep * our incore parent inode up to date. See the note on locking issues * for more details. */ error = xrep_trans_commit(sc); if (error) return error; xchk_iunlock(sc, XFS_ILOCK_EXCL); return 0; } /* * Examine a parent pointer of a file. If it leads us back to the directory * that we're rebuilding, create an incore dirent from the parent pointer and * stash it. */ STATIC int xrep_dir_scan_pptr( struct xfs_scrub *sc, struct xfs_inode *ip, unsigned int attr_flags, const unsigned char *name, unsigned int namelen, const void *value, unsigned int valuelen, void *priv) { struct xfs_name xname = { .name = name, .len = namelen, .type = xfs_mode_to_ftype(VFS_I(ip)->i_mode), }; xfs_ino_t parent_ino; uint32_t parent_gen; struct xrep_dir *rd = priv; int error; if (!(attr_flags & XFS_ATTR_PARENT)) return 0; /* * Ignore parent pointers that point back to a different dir, list the * wrong generation number, or are invalid. */ error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value, valuelen, &parent_ino, &parent_gen); if (error) return error; if (parent_ino != sc->ip->i_ino || parent_gen != VFS_I(sc->ip)->i_generation) return 0; mutex_lock(&rd->pscan.lock); error = xrep_dir_stash_createname(rd, &xname, ip->i_ino); mutex_unlock(&rd->pscan.lock); return error; } /* * If this child dirent points to the directory being repaired, remember that * fact so that we can reset the dotdot entry if necessary. */ STATIC int xrep_dir_scan_dirent( struct xfs_scrub *sc, struct xfs_inode *dp, xfs_dir2_dataptr_t dapos, const struct xfs_name *name, xfs_ino_t ino, void *priv) { struct xrep_dir *rd = priv; /* Dirent doesn't point to this directory. */ if (ino != rd->sc->ip->i_ino) return 0; /* Ignore garbage inum. */ if (!xfs_verify_dir_ino(rd->sc->mp, ino)) return 0; /* No weird looking names. */ if (name->len >= MAXNAMELEN || name->len <= 0) return 0; /* Don't pick up dot or dotdot entries; we only want child dirents. */ if (xfs_dir2_samename(name, &xfs_name_dotdot) || xfs_dir2_samename(name, &xfs_name_dot)) return 0; trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot, dp->i_ino); xrep_findparent_scan_found(&rd->pscan, dp->i_ino); return 0; } /* * Decide if we want to look for child dirents or parent pointers in this file. * Skip the dir being repaired and any files being used to stage repairs. */ static inline bool xrep_dir_want_scan( struct xrep_dir *rd, const struct xfs_inode *ip) { return ip != rd->sc->ip && !xrep_is_tempfile(ip); } /* * Take ILOCK on a file that we want to scan. * * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED. */ static inline unsigned int xrep_dir_scan_ilock( struct xrep_dir *rd, struct xfs_inode *ip) { uint lock_mode = XFS_ILOCK_SHARED; /* Need to take the shared ILOCK to advance the iscan cursor. */ if (!xrep_dir_want_scan(rd, ip)) goto lock; if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) { lock_mode = XFS_ILOCK_EXCL; goto lock; } if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af)) lock_mode = XFS_ILOCK_EXCL; lock: xfs_ilock(ip, lock_mode); return lock_mode; } /* * Scan this file for relevant child dirents or parent pointers that point to * the directory we're rebuilding. */ STATIC int xrep_dir_scan_file( struct xrep_dir *rd, struct xfs_inode *ip) { unsigned int lock_mode; int error = 0; lock_mode = xrep_dir_scan_ilock(rd, ip); if (!xrep_dir_want_scan(rd, ip)) goto scan_done; /* * If the extended attributes look as though they has been zapped by * the inode record repair code, we cannot scan for parent pointers. */ if (xchk_pptr_looks_zapped(ip)) { error = -EBUSY; goto scan_done; } error = xchk_xattr_walk(rd->sc, ip, xrep_dir_scan_pptr, NULL, rd); if (error) goto scan_done; if (S_ISDIR(VFS_I(ip)->i_mode)) { /* * If the directory looks as though it has been zapped by the * inode record repair code, we cannot scan for child dirents. */ if (xchk_dir_looks_zapped(ip)) { error = -EBUSY; goto scan_done; } error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd); if (error) goto scan_done; } scan_done: xchk_iscan_mark_visited(&rd->pscan.iscan, ip); xfs_iunlock(ip, lock_mode); return error; } /* * Scan all files in the filesystem for parent pointers that we can turn into * replacement dirents, and a dirent that we can use to set the dotdot pointer. */ STATIC int xrep_dir_scan_dirtree( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; struct xfs_inode *ip; int error; /* Roots of directory trees are their own parents. */ if (xchk_inode_is_dirtree_root(sc->ip)) xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino); /* * Filesystem scans are time consuming. Drop the directory ILOCK and * all other resources for the duration of the scan and hope for the * best. The live update hooks will keep our scan information up to * date even though we've dropped the locks. */ xchk_trans_cancel(sc); if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); error = xchk_trans_alloc_empty(sc); if (error) return error; while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) { bool flush; error = xrep_dir_scan_file(rd, ip); xchk_irele(sc, ip); if (error) break; /* Flush stashed dirent updates to constrain memory usage. */ mutex_lock(&rd->pscan.lock); flush = xrep_dir_want_flush_stashed(rd); mutex_unlock(&rd->pscan.lock); if (flush) { xchk_trans_cancel(sc); error = xrep_tempfile_iolock_polled(sc); if (error) break; error = xrep_dir_replay_updates(rd); xrep_tempfile_iounlock(sc); if (error) break; error = xchk_trans_alloc_empty(sc); if (error) break; } if (xchk_should_terminate(sc, &error)) break; } xchk_iscan_iter_finish(&rd->pscan.iscan); if (error) { /* * If we couldn't grab an inode that was busy with a state * change, change the error code so that we exit to userspace * as quickly as possible. */ if (error == -EBUSY) return -ECANCELED; return error; } /* * Cancel the empty transaction so that we can (later) use the atomic * file mapping exchange functions to lock files and commit the new * directory. */ xchk_trans_cancel(rd->sc); return 0; } /* * Capture dirent updates being made by other threads which are relevant to the * directory being repaired. */ STATIC int xrep_dir_live_update( struct notifier_block *nb, unsigned long action, void *data) { struct xfs_dir_update_params *p = data; struct xrep_dir *rd; struct xfs_scrub *sc; int error = 0; rd = container_of(nb, struct xrep_dir, pscan.dhook.dirent_hook.nb); sc = rd->sc; /* * This thread updated a child dirent in the directory that we're * rebuilding. Stash the update for replay against the temporary * directory. */ if (p->dp->i_ino == sc->ip->i_ino && xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) { mutex_lock(&rd->pscan.lock); if (p->delta > 0) error = xrep_dir_stash_createname(rd, p->name, p->ip->i_ino); else error = xrep_dir_stash_removename(rd, p->name, p->ip->i_ino); mutex_unlock(&rd->pscan.lock); if (error) goto out_abort; } /* * This thread updated another directory's child dirent that points to * the directory that we're rebuilding, so remember the new dotdot * target. */ if (p->ip->i_ino == sc->ip->i_ino && xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) { if (p->delta > 0) { trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot, p->dp->i_ino); xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino); } else { trace_xrep_dir_stash_removename(sc->tempip, &xfs_name_dotdot, rd->pscan.parent_ino); xrep_findparent_scan_found(&rd->pscan, NULLFSINO); } } return NOTIFY_DONE; out_abort: xchk_iscan_abort(&rd->pscan.iscan); return NOTIFY_DONE; } /* * Free all the directory blocks and reset the data fork. The caller must * join the inode to the transaction. This function returns with the inode * joined to a clean scrub transaction. */ STATIC int xrep_dir_reset_fork( struct xrep_dir *rd, xfs_ino_t parent_ino) { struct xfs_scrub *sc = rd->sc; struct xfs_ifork *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK); int error; /* Unmap all the directory buffers. */ if (xfs_ifork_has_extents(ifp)) { error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); if (error) return error; } trace_xrep_dir_reset_fork(sc->tempip, parent_ino); /* Reset the data fork to an empty data fork. */ xfs_idestroy_fork(ifp); ifp->if_bytes = 0; sc->tempip->i_disk_size = 0; /* Reinitialize the short form directory. */ xrep_dir_init_args(rd, sc->tempip, NULL); return xfs_dir2_sf_create(&rd->args, parent_ino); } /* * Prepare both inodes' directory forks for exchanging mappings. Promote the * tempfile from short format to leaf format, and if the file being repaired * has a short format data fork, turn it into an empty extent list. */ STATIC int xrep_dir_swap_prep( struct xfs_scrub *sc, bool temp_local, bool ip_local) { int error; /* * If the tempfile's directory is in shortform format, convert that to * a single leaf extent so that we can use the atomic mapping exchange. */ if (temp_local) { struct xfs_da_args args = { .dp = sc->tempip, .geo = sc->mp->m_dir_geo, .whichfork = XFS_DATA_FORK, .trans = sc->tp, .total = 1, .owner = sc->ip->i_ino, }; error = xfs_dir2_sf_to_block(&args); if (error) return error; /* * Roll the deferred log items to get us back to a clean * transaction. */ error = xfs_defer_finish(&sc->tp); if (error) return error; } /* * If the file being repaired had a shortform data fork, convert that * to an empty extent list in preparation for the atomic mapping * exchange. */ if (ip_local) { struct xfs_ifork *ifp; ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); xfs_idestroy_fork(ifp); ifp->if_format = XFS_DINODE_FMT_EXTENTS; ifp->if_nextents = 0; ifp->if_bytes = 0; ifp->if_data = NULL; ifp->if_height = 0; xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE | XFS_ILOG_DDATA); } return 0; } /* * Replace the inode number of a directory entry. */ static int xrep_dir_replace( struct xrep_dir *rd, struct xfs_inode *dp, const struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t total) { struct xfs_scrub *sc = rd->sc; int error; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); error = xfs_dir_ino_validate(sc->mp, inum); if (error) return error; xrep_dir_init_args(rd, dp, name); rd->args.inumber = inum; rd->args.total = total; return xfs_dir_replace_args(&rd->args); } /* * Reset the link count of this directory and adjust the unlinked list pointers * as needed. */ STATIC int xrep_dir_set_nlink( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; struct xfs_inode *dp = sc->ip; struct xfs_perag *pag; unsigned int new_nlink = min_t(unsigned long long, rd->subdirs + 2, XFS_NLINK_PINNED); int error; /* * The directory is not on the incore unlinked list, which means that * it needs to be reachable via the directory tree. Update the nlink * with our observed link count. If the directory has no parent, it * will be moved to the orphanage. */ if (!xfs_inode_on_unlinked_list(dp)) goto reset_nlink; /* * The directory is on the unlinked list and we did not find any * dirents. Set the link count to zero and let the directory * inactivate when the last reference drops. */ if (rd->dirents == 0) { rd->needs_adoption = false; new_nlink = 0; goto reset_nlink; } /* * The directory is on the unlinked list and we found dirents. This * directory needs to be reachable via the directory tree. Remove the * dir from the unlinked list and update nlink with the observed link * count. If the directory has no parent, it will be moved to the * orphanage. */ pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino)); if (!pag) { ASSERT(0); return -EFSCORRUPTED; } error = xfs_iunlink_remove(sc->tp, pag, dp); xfs_perag_put(pag); if (error) return error; reset_nlink: if (VFS_I(dp)->i_nlink != new_nlink) set_nlink(VFS_I(dp), new_nlink); return 0; } /* * Finish replaying stashed dirent updates, allocate a transaction for * exchanging data fork mappings, and take the ILOCKs of both directories * before we commit the new directory structure. */ STATIC int xrep_dir_finalize_tempdir( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; int error; if (!xfs_has_parent(sc->mp)) return xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx); /* * Repair relies on the ILOCK to quiesce all possible dirent updates. * Replay all queued dirent updates into the tempdir before exchanging * the contents, even if that means dropping the ILOCKs and the * transaction. */ do { error = xrep_dir_replay_updates(rd); if (error) return error; error = xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx); if (error) return error; if (xfarray_length(rd->dir_entries) == 0) break; xchk_trans_cancel(sc); xrep_tempfile_iunlock_both(sc); } while (!xchk_should_terminate(sc, &error)); return error; } /* Exchange the temporary directory's data fork with the one being repaired. */ STATIC int xrep_dir_swap( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; xfs_ino_t ino; bool ip_local, temp_local; int error = 0; /* * If we never found the parent for this directory, temporarily assign * the root dir as the parent; we'll move this to the orphanage after * exchanging the dir contents. We hold the ILOCK of the dir being * repaired, so we're not worried about racy updates of dotdot. */ ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); if (rd->pscan.parent_ino == NULLFSINO) { rd->needs_adoption = true; rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino; } /* * Reset the temporary directory's '..' entry to point to the parent * that we found. The dirent replace code asserts if the dirent * already points at the new inumber, so we look it up here. * * It's also possible that this replacement could also expand a sf * tempdir into block format. */ error = xchk_dir_lookup(sc, rd->sc->tempip, &xfs_name_dotdot, &ino); if (error) return error; if (rd->pscan.parent_ino != ino) { error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot, rd->pscan.parent_ino, rd->tx.req.resblks); if (error) return error; } /* * Changing the dot and dotdot entries could have changed the shape of * the directory, so we recompute these. */ ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL; temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL; /* * If the both files have a local format data fork and the rebuilt * directory data would fit in the repaired file's data fork, copy * the contents from the tempfile and update the directory link count. * We're done now. */ if (ip_local && temp_local && sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) { xrep_tempfile_copyout_local(sc, XFS_DATA_FORK); return xrep_dir_set_nlink(rd); } /* * Clean the transaction before we start working on exchanging * directory contents. */ error = xrep_tempfile_roll_trans(rd->sc); if (error) return error; /* Otherwise, make sure both data forks are in block-mapping mode. */ error = xrep_dir_swap_prep(sc, temp_local, ip_local); if (error) return error; /* * Set nlink of the directory in the same transaction sequence that * (atomically) commits the new directory data. */ error = xrep_dir_set_nlink(rd); if (error) return error; return xrep_tempexch_contents(sc, &rd->tx); } /* * Exchange the new directory contents (which we created in the tempfile) with * the directory being repaired. */ STATIC int xrep_dir_rebuild_tree( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; int error; trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino); /* * Take the IOLOCK on the temporary file so that we can run dir * operations with the same locks held as we would for a normal file. * We still hold sc->ip's IOLOCK. */ error = xrep_tempfile_iolock_polled(rd->sc); if (error) return error; /* * Allocate transaction, lock inodes, and make sure that we've replayed * all the stashed dirent updates to the tempdir. After this point, * we're ready to exchange data fork mappings. */ error = xrep_dir_finalize_tempdir(rd); if (error) return error; if (xchk_iscan_aborted(&rd->pscan.iscan)) return -ECANCELED; /* * Exchange the tempdir's data fork with the file being repaired. This * recreates the transaction and re-takes the ILOCK in the scrub * context. */ error = xrep_dir_swap(rd); if (error) return error; /* * Release the old directory blocks and reset the data fork of the temp * directory to an empty shortform directory because inactivation does * nothing for directories. */ error = xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino); if (error) return error; /* * Roll to get a transaction without any inodes joined to it. Then we * can drop the tempfile's ILOCK and IOLOCK before doing more work on * the scrub target directory. */ error = xfs_trans_roll(&sc->tp); if (error) return error; xrep_tempfile_iunlock(sc); xrep_tempfile_iounlock(sc); return 0; } /* Set up the filesystem scan so we can regenerate directory entries. */ STATIC int xrep_dir_setup_scan( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; char *descr; int error; /* Set up some staging memory for salvaging dirents. */ descr = xchk_xfile_ino_descr(sc, "directory entries"); error = xfarray_create(descr, 0, sizeof(struct xrep_dirent), &rd->dir_entries); kfree(descr); if (error) return error; descr = xchk_xfile_ino_descr(sc, "directory entry names"); error = xfblob_create(descr, &rd->dir_names); kfree(descr); if (error) goto out_xfarray; if (xfs_has_parent(sc->mp)) error = __xrep_findparent_scan_start(sc, &rd->pscan, xrep_dir_live_update); else error = xrep_findparent_scan_start(sc, &rd->pscan); if (error) goto out_xfblob; return 0; out_xfblob: xfblob_destroy(rd->dir_names); rd->dir_names = NULL; out_xfarray: xfarray_destroy(rd->dir_entries); rd->dir_entries = NULL; return error; } /* * Move the current file to the orphanage. * * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon * successful return, the scrub transaction will have enough extra reservation * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the * orphanage; and both inodes will be ijoined. */ STATIC int xrep_dir_move_to_orphanage( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; xfs_ino_t orig_parent, new_parent; int error; /* * We are about to drop the ILOCK on sc->ip to lock the orphanage and * prepare for the adoption. Therefore, look up the old dotdot entry * for sc->ip so that we can compare it after we re-lock sc->ip. */ error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent); if (error) return error; /* * Drop the ILOCK on the scrub target and commit the transaction. * Adoption computes its own resource requirements and gathers the * necessary components. */ error = xrep_trans_commit(sc); if (error) return error; xchk_iunlock(sc, XFS_ILOCK_EXCL); /* If we can take the orphanage's iolock then we're ready to move. */ if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) { xchk_iunlock(sc, sc->ilock_flags); error = xrep_orphanage_iolock_two(sc); if (error) return error; } /* Grab transaction and ILOCK the two files. */ error = xrep_adoption_trans_alloc(sc, &rd->adoption); if (error) return error; error = xrep_adoption_compute_name(&rd->adoption, &rd->xname); if (error) return error; /* * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot * entry again. If the parent changed or the child was unlinked while * the child directory was unlocked, we don't need to move the child to * the orphanage after all. */ error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent); if (error) return error; /* * Attach to the orphanage if we still have a linked directory and it * hasn't been moved. */ if (orig_parent == new_parent && VFS_I(sc->ip)->i_nlink > 0) { error = xrep_adoption_move(&rd->adoption); if (error) return error; } /* * Launder the scrub transaction so we can drop the orphanage ILOCK * and IOLOCK. Return holding the scrub target's ILOCK and IOLOCK. */ error = xrep_adoption_trans_roll(&rd->adoption); if (error) return error; xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL); xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); return 0; } /* * Repair the directory metadata. * * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer * cache in XFS can't handle aliased multiblock buffers, so this might * misbehave if the directory blocks are crosslinked with other filesystem * metadata. * * XXX: Is it necessary to check the dcache for this directory to make sure * that we always recreate every cached entry? */ int xrep_directory( struct xfs_scrub *sc) { struct xrep_dir *rd = sc->buf; int error; /* The rmapbt is required to reap the old data fork. */ if (!xfs_has_rmapbt(sc->mp)) return -EOPNOTSUPP; /* We require atomic file exchange range to rebuild anything. */ if (!xfs_has_exchange_range(sc->mp)) return -EOPNOTSUPP; error = xrep_dir_setup_scan(rd); if (error) return error; if (xfs_has_parent(sc->mp)) error = xrep_dir_scan_dirtree(rd); else error = xrep_dir_salvage_entries(rd); if (error) goto out_teardown; /* Last chance to abort before we start committing fixes. */ if (xchk_should_terminate(sc, &error)) goto out_teardown; error = xrep_dir_rebuild_tree(rd); if (error) goto out_teardown; if (rd->needs_adoption) { if (!xrep_orphanage_can_adopt(rd->sc)) error = -EFSCORRUPTED; else error = xrep_dir_move_to_orphanage(rd); if (error) goto out_teardown; } out_teardown: xrep_dir_teardown(sc); return error; }