// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2003 Bernardo Innocenti * * Based on former do_div() implementation from asm-parisc/div64.h: * Copyright (C) 1999 Hewlett-Packard Co * Copyright (C) 1999 David Mosberger-Tang * * * Generic C version of 64bit/32bit division and modulo, with * 64bit result and 32bit remainder. * * The fast case for (n>>32 == 0) is handled inline by do_div(). * * Code generated for this function might be very inefficient * for some CPUs. __div64_32() can be overridden by linking arch-specific * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S * or by defining a preprocessor macro in arch/include/asm/div64.h. */ #include #include #include #include #include #include /* Not needed on 64bit architectures */ #if BITS_PER_LONG == 32 #ifndef __div64_32 uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) { uint64_t rem = *n; uint64_t b = base; uint64_t res, d = 1; uint32_t high = rem >> 32; /* Reduce the thing a bit first */ res = 0; if (high >= base) { high /= base; res = (uint64_t) high << 32; rem -= (uint64_t) (high*base) << 32; } while ((int64_t)b > 0 && b < rem) { b = b+b; d = d+d; } do { if (rem >= b) { rem -= b; res += d; } b >>= 1; d >>= 1; } while (d); *n = res; return rem; } EXPORT_SYMBOL(__div64_32); #endif #ifndef div_s64_rem s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) { u64 quotient; if (dividend < 0) { quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder); *remainder = -*remainder; if (divisor > 0) quotient = -quotient; } else { quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder); if (divisor < 0) quotient = -quotient; } return quotient; } EXPORT_SYMBOL(div_s64_rem); #endif /* * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder * @dividend: 64bit dividend * @divisor: 64bit divisor * @remainder: 64bit remainder * * This implementation is a comparable to algorithm used by div64_u64. * But this operation, which includes math for calculating the remainder, * is kept distinct to avoid slowing down the div64_u64 operation on 32bit * systems. */ #ifndef div64_u64_rem u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) { u32 high = divisor >> 32; u64 quot; if (high == 0) { u32 rem32; quot = div_u64_rem(dividend, divisor, &rem32); *remainder = rem32; } else { int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) quot--; *remainder = dividend - quot * divisor; if (*remainder >= divisor) { quot++; *remainder -= divisor; } } return quot; } EXPORT_SYMBOL(div64_u64_rem); #endif /* * div64_u64 - unsigned 64bit divide with 64bit divisor * @dividend: 64bit dividend * @divisor: 64bit divisor * * This implementation is a modified version of the algorithm proposed * by the book 'Hacker's Delight'. The original source and full proof * can be found here and is available for use without restriction. * * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' */ #ifndef div64_u64 u64 div64_u64(u64 dividend, u64 divisor) { u32 high = divisor >> 32; u64 quot; if (high == 0) { quot = div_u64(dividend, divisor); } else { int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) quot--; if ((dividend - quot * divisor) >= divisor) quot++; } return quot; } EXPORT_SYMBOL(div64_u64); #endif #ifndef div64_s64 s64 div64_s64(s64 dividend, s64 divisor) { s64 quot, t; quot = div64_u64(abs(dividend), abs(divisor)); t = (dividend ^ divisor) >> 63; return (quot ^ t) - t; } EXPORT_SYMBOL(div64_s64); #endif #endif /* BITS_PER_LONG == 32 */ /* * Iterative div/mod for use when dividend is not expected to be much * bigger than divisor. */ #ifndef iter_div_u64_rem u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) { return __iter_div_u64_rem(dividend, divisor, remainder); } EXPORT_SYMBOL(iter_div_u64_rem); #endif #if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64) #define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c) #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64) static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) { /* native 64x64=128 bits multiplication */ u128 prod = (u128)a * b + c; *p_lo = prod; return prod >> 64; } #else static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) { /* perform a 64x64=128 bits multiplication in 32bit chunks */ u64 x, y, z; /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */ x = mul_add(a, b, c); y = mul_add(a, b >> 32, c >> 32); y = add_u64_u32(y, x >> 32); z = mul_add(a >> 32, b >> 32, y >> 32); y = mul_add(a >> 32, b, y); *p_lo = (y << 32) + (u32)x; return add_u64_u32(z, y >> 32); } #endif #ifndef BITS_PER_ITER #define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16) #endif #if BITS_PER_ITER == 32 #define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c) #define add_u64_long(a, b) ((a) + (b)) #else #undef BITS_PER_ITER #define BITS_PER_ITER 16 static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c) { u64 n_lo = mul_add(a, b, c); u64 n_med = mul_add(a >> 32, b, c >> 32); n_med = add_u64_u32(n_med, n_lo >> 32); *p_lo = n_med << 32 | (u32)n_lo; return n_med >> 32; } #define add_u64_long(a, b) add_u64_u32(a, b) #endif u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) { unsigned long d_msig, q_digit; unsigned int reps, d_z_hi; u64 quotient, n_lo, n_hi; u32 overflow; n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c); if (!n_hi) return div64_u64(n_lo, d); if (unlikely(n_hi >= d)) { /* trigger runtime exception if divisor is zero */ if (d == 0) { unsigned long zero = 0; OPTIMIZER_HIDE_VAR(zero); return ~0UL/zero; } /* overflow: result is unrepresentable in a u64 */ return ~0ULL; } /* Left align the divisor, shifting the dividend to match */ d_z_hi = __builtin_clzll(d); if (d_z_hi) { d <<= d_z_hi; n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi); n_lo <<= d_z_hi; } reps = 64 / BITS_PER_ITER; /* Optimise loop count for small dividends */ if (!(u32)(n_hi >> 32)) { reps -= 32 / BITS_PER_ITER; n_hi = n_hi << 32 | n_lo >> 32; n_lo <<= 32; } #if BITS_PER_ITER == 16 if (!(u32)(n_hi >> 48)) { reps--; n_hi = add_u64_u32(n_hi << 16, n_lo >> 48); n_lo <<= 16; } #endif /* Invert the dividend so we can use add instead of subtract. */ n_lo = ~n_lo; n_hi = ~n_hi; /* * Get the most significant BITS_PER_ITER bits of the divisor. * This is used to get a low 'guestimate' of the quotient digit. */ d_msig = (d >> (64 - BITS_PER_ITER)) + 1; /* * Now do a 'long division' with BITS_PER_ITER bit 'digits'. * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits. * The worst case is dividing ~0 by 0x8000 which requires two subtracts. */ quotient = 0; while (reps--) { q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig; /* Shift 'n' left to align with the product q_digit * d */ overflow = n_hi >> (64 - BITS_PER_ITER); n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER)); n_lo <<= BITS_PER_ITER; /* Add product to negated divisor */ overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi); /* Adjust for the q_digit 'guestimate' being low */ while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) { q_digit++; n_hi += d; overflow += n_hi < d; } quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit); } /* * The above only ensures the remainder doesn't overflow, * it can still be possible to add (aka subtract) another copy * of the divisor. */ if ((n_hi + d) > n_hi) quotient++; return quotient; } #if !defined(test_mul_u64_add_u64_div_u64) EXPORT_SYMBOL(mul_u64_add_u64_div_u64); #endif #endif