// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2023-2025 Christoph Hellwig. * Copyright (c) 2024-2025, Western Digital Corporation or its affiliates. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_rtbitmap.h" #include "xfs_zone_alloc.h" #include "xfs_zone_priv.h" #include "xfs_zones.h" /* * Note: the zoned allocator does not support a rtextsize > 1, so this code and * the allocator itself uses file system blocks interchangeable with realtime * extents without doing the otherwise required conversions. */ /* * Per-task space reservation. * * Tasks that need to wait for GC to free up space allocate one of these * on-stack and adds it to the per-mount zi_reclaim_reservations lists. * The GC thread will then wake the tasks in order when space becomes available. */ struct xfs_zone_reservation { struct list_head entry; struct task_struct *task; xfs_filblks_t count_fsb; }; /* * Calculate the number of reserved blocks. * * XC_FREE_RTEXTENTS counts the user available capacity, to which the file * system can be filled, while XC_FREE_RTAVAILABLE counts the blocks instantly * available for writes without waiting for GC. * * For XC_FREE_RTAVAILABLE only the smaller reservation required for GC and * block zeroing is excluded from the user capacity, while XC_FREE_RTEXTENTS * is further restricted by at least one zone as well as the optional * persistently reserved blocks. This allows the allocator to run more * smoothly by not always triggering GC. */ uint64_t xfs_zoned_default_resblks( struct xfs_mount *mp, enum xfs_free_counter ctr) { switch (ctr) { case XC_FREE_RTEXTENTS: return (uint64_t)XFS_RESERVED_ZONES * mp->m_groups[XG_TYPE_RTG].blocks + mp->m_sb.sb_rtreserved; case XC_FREE_RTAVAILABLE: return (uint64_t)XFS_GC_ZONES * mp->m_groups[XG_TYPE_RTG].blocks; default: ASSERT(0); return 0; } } void xfs_zoned_resv_wake_all( struct xfs_mount *mp) { struct xfs_zone_info *zi = mp->m_zone_info; struct xfs_zone_reservation *reservation; spin_lock(&zi->zi_reservation_lock); list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) wake_up_process(reservation->task); spin_unlock(&zi->zi_reservation_lock); } void xfs_zoned_add_available( struct xfs_mount *mp, xfs_filblks_t count_fsb) { struct xfs_zone_info *zi = mp->m_zone_info; struct xfs_zone_reservation *reservation; if (list_empty_careful(&zi->zi_reclaim_reservations)) { xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); return; } spin_lock(&zi->zi_reservation_lock); xfs_add_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb); count_fsb = xfs_sum_freecounter(mp, XC_FREE_RTAVAILABLE); list_for_each_entry(reservation, &zi->zi_reclaim_reservations, entry) { if (reservation->count_fsb > count_fsb) break; wake_up_process(reservation->task); count_fsb -= reservation->count_fsb; } spin_unlock(&zi->zi_reservation_lock); } static int xfs_zoned_space_wait_error( struct xfs_mount *mp) { if (xfs_is_shutdown(mp)) return -EIO; if (fatal_signal_pending(current)) return -EINTR; return 0; } static int xfs_zoned_reserve_available( struct xfs_inode *ip, xfs_filblks_t count_fsb, unsigned int flags) { struct xfs_mount *mp = ip->i_mount; struct xfs_zone_info *zi = mp->m_zone_info; struct xfs_zone_reservation reservation = { .task = current, .count_fsb = count_fsb, }; int error; /* * If there are no waiters, try to directly grab the available blocks * from the percpu counter. * * If the caller wants to dip into the reserved pool also bypass the * wait list. This relies on the fact that we have a very graciously * sized reserved pool that always has enough space. If the reserved * allocations fail we're in trouble. */ if (likely(list_empty_careful(&zi->zi_reclaim_reservations) || (flags & XFS_ZR_RESERVED))) { error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, flags & XFS_ZR_RESERVED); if (error != -ENOSPC) return error; } if (flags & XFS_ZR_NOWAIT) return -EAGAIN; spin_lock(&zi->zi_reservation_lock); list_add_tail(&reservation.entry, &zi->zi_reclaim_reservations); while ((error = xfs_zoned_space_wait_error(mp)) == 0) { set_current_state(TASK_KILLABLE); error = xfs_dec_freecounter(mp, XC_FREE_RTAVAILABLE, count_fsb, flags & XFS_ZR_RESERVED); if (error != -ENOSPC) break; /* * Make sure to start GC if it is not running already. As we * check the rtavailable count when filling up zones, GC is * normally already running at this point, but in some setups * with very few zones we may completely run out of non- * reserved blocks in between filling zones. */ if (!xfs_is_zonegc_running(mp)) wake_up_process(zi->zi_gc_thread); /* * If there is no reclaimable group left and we aren't still * processing a pending GC request give up as we're fully out * of space. */ if (!xfs_group_marked(mp, XG_TYPE_RTG, XFS_RTG_RECLAIMABLE) && !xfs_is_zonegc_running(mp)) break; spin_unlock(&zi->zi_reservation_lock); schedule(); spin_lock(&zi->zi_reservation_lock); } list_del(&reservation.entry); spin_unlock(&zi->zi_reservation_lock); __set_current_state(TASK_RUNNING); return error; } /* * Implement greedy space allocation for short writes by trying to grab all * that is left after locking out other threads from trying to do the same. * * This isn't exactly optimal and can hopefully be replaced by a proper * percpu_counter primitive one day. */ static int xfs_zoned_reserve_extents_greedy( struct xfs_inode *ip, xfs_filblks_t *count_fsb, unsigned int flags) { struct xfs_mount *mp = ip->i_mount; struct xfs_zone_info *zi = mp->m_zone_info; s64 len = *count_fsb; int error = -ENOSPC; spin_lock(&zi->zi_reservation_lock); len = min(len, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); if (len > 0) { *count_fsb = len; error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, *count_fsb, flags & XFS_ZR_RESERVED); } spin_unlock(&zi->zi_reservation_lock); return error; } int xfs_zoned_space_reserve( struct xfs_inode *ip, xfs_filblks_t count_fsb, unsigned int flags, struct xfs_zone_alloc_ctx *ac) { struct xfs_mount *mp = ip->i_mount; int error; ASSERT(ac->reserved_blocks == 0); ASSERT(ac->open_zone == NULL); error = xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb, flags & XFS_ZR_RESERVED); if (error == -ENOSPC && (flags & XFS_ZR_GREEDY) && count_fsb > 1) error = xfs_zoned_reserve_extents_greedy(ip, &count_fsb, flags); if (error) return error; error = xfs_zoned_reserve_available(ip, count_fsb, flags); if (error) { xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, count_fsb); return error; } ac->reserved_blocks = count_fsb; return 0; } void xfs_zoned_space_unreserve( struct xfs_inode *ip, struct xfs_zone_alloc_ctx *ac) { if (ac->reserved_blocks > 0) { struct xfs_mount *mp = ip->i_mount; xfs_zoned_add_available(mp, ac->reserved_blocks); xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, ac->reserved_blocks); } if (ac->open_zone) xfs_open_zone_put(ac->open_zone); }