| /* |
| * QEMU float support |
| * |
| * The code in this source file is derived from release 2a of the SoftFloat |
| * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and |
| * some later contributions) are provided under that license, as detailed below. |
| * It has subsequently been modified by contributors to the QEMU Project, |
| * so some portions are provided under: |
| * the SoftFloat-2a license |
| * the BSD license |
| * GPL-v2-or-later |
| * |
| * Any future contributions to this file after December 1st 2014 will be |
| * taken to be licensed under the Softfloat-2a license unless specifically |
| * indicated otherwise. |
| */ |
| |
| static void partsN(return_nan)(FloatPartsN *a, float_status *s) |
| { |
| switch (a->cls) { |
| case float_class_snan: |
| float_raise(float_flag_invalid | float_flag_invalid_snan, s); |
| if (s->default_nan_mode) { |
| parts_default_nan(a, s); |
| } else { |
| parts_silence_nan(a, s); |
| } |
| break; |
| case float_class_qnan: |
| if (s->default_nan_mode) { |
| parts_default_nan(a, s); |
| } |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| } |
| |
| static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b, |
| float_status *s) |
| { |
| if (is_snan(a->cls) || is_snan(b->cls)) { |
| float_raise(float_flag_invalid | float_flag_invalid_snan, s); |
| } |
| |
| if (s->default_nan_mode) { |
| parts_default_nan(a, s); |
| } else { |
| int cmp = frac_cmp(a, b); |
| if (cmp == 0) { |
| cmp = a->sign < b->sign; |
| } |
| |
| if (pickNaN(a->cls, b->cls, cmp > 0, s)) { |
| a = b; |
| } |
| if (is_snan(a->cls)) { |
| parts_silence_nan(a, s); |
| } |
| } |
| return a; |
| } |
| |
| static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b, |
| FloatPartsN *c, float_status *s, |
| int ab_mask, int abc_mask) |
| { |
| int which; |
| |
| if (unlikely(abc_mask & float_cmask_snan)) { |
| float_raise(float_flag_invalid | float_flag_invalid_snan, s); |
| } |
| |
| which = pickNaNMulAdd(a->cls, b->cls, c->cls, |
| ab_mask == float_cmask_infzero, s); |
| |
| if (s->default_nan_mode || which == 3) { |
| /* |
| * Note that this check is after pickNaNMulAdd so that function |
| * has an opportunity to set the Invalid flag for infzero. |
| */ |
| parts_default_nan(a, s); |
| return a; |
| } |
| |
| switch (which) { |
| case 0: |
| break; |
| case 1: |
| a = b; |
| break; |
| case 2: |
| a = c; |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| if (is_snan(a->cls)) { |
| parts_silence_nan(a, s); |
| } |
| return a; |
| } |
| |
| /* |
| * Canonicalize the FloatParts structure. Determine the class, |
| * unbias the exponent, and normalize the fraction. |
| */ |
| static void partsN(canonicalize)(FloatPartsN *p, float_status *status, |
| const FloatFmt *fmt) |
| { |
| if (unlikely(p->exp == 0)) { |
| if (likely(frac_eqz(p))) { |
| p->cls = float_class_zero; |
| } else if (status->flush_inputs_to_zero) { |
| float_raise(float_flag_input_denormal, status); |
| p->cls = float_class_zero; |
| frac_clear(p); |
| } else { |
| int shift = frac_normalize(p); |
| p->cls = float_class_normal; |
| p->exp = fmt->frac_shift - fmt->exp_bias |
| - shift + !fmt->m68k_denormal; |
| } |
| } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) { |
| p->cls = float_class_normal; |
| p->exp -= fmt->exp_bias; |
| frac_shl(p, fmt->frac_shift); |
| p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; |
| } else if (likely(frac_eqz(p))) { |
| p->cls = float_class_inf; |
| } else { |
| frac_shl(p, fmt->frac_shift); |
| p->cls = (parts_is_snan_frac(p->frac_hi, status) |
| ? float_class_snan : float_class_qnan); |
| } |
| } |
| |
| /* |
| * Round and uncanonicalize a floating-point number by parts. There |
| * are FRAC_SHIFT bits that may require rounding at the bottom of the |
| * fraction; these bits will be removed. The exponent will be biased |
| * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0]. |
| */ |
| static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s, |
| const FloatFmt *fmt) |
| { |
| const int exp_max = fmt->exp_max; |
| const int frac_shift = fmt->frac_shift; |
| const uint64_t round_mask = fmt->round_mask; |
| const uint64_t frac_lsb = round_mask + 1; |
| const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1); |
| const uint64_t roundeven_mask = round_mask | frac_lsb; |
| uint64_t inc; |
| bool overflow_norm = false; |
| int exp, flags = 0; |
| |
| switch (s->float_rounding_mode) { |
| case float_round_nearest_even: |
| if (N > 64 && frac_lsb == 0) { |
| inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1 |
| ? frac_lsbm1 : 0); |
| } else { |
| inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 |
| ? frac_lsbm1 : 0); |
| } |
| break; |
| case float_round_ties_away: |
| inc = frac_lsbm1; |
| break; |
| case float_round_to_zero: |
| overflow_norm = true; |
| inc = 0; |
| break; |
| case float_round_up: |
| inc = p->sign ? 0 : round_mask; |
| overflow_norm = p->sign; |
| break; |
| case float_round_down: |
| inc = p->sign ? round_mask : 0; |
| overflow_norm = !p->sign; |
| break; |
| case float_round_to_odd: |
| overflow_norm = true; |
| /* fall through */ |
| case float_round_to_odd_inf: |
| if (N > 64 && frac_lsb == 0) { |
| inc = p->frac_hi & 1 ? 0 : round_mask; |
| } else { |
| inc = p->frac_lo & frac_lsb ? 0 : round_mask; |
| } |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| |
| exp = p->exp + fmt->exp_bias; |
| if (likely(exp > 0)) { |
| if (p->frac_lo & round_mask) { |
| flags |= float_flag_inexact; |
| if (frac_addi(p, p, inc)) { |
| frac_shr(p, 1); |
| p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; |
| exp++; |
| } |
| p->frac_lo &= ~round_mask; |
| } |
| |
| if (fmt->arm_althp) { |
| /* ARM Alt HP eschews Inf and NaN for a wider exponent. */ |
| if (unlikely(exp > exp_max)) { |
| /* Overflow. Return the maximum normal. */ |
| flags = float_flag_invalid; |
| exp = exp_max; |
| frac_allones(p); |
| p->frac_lo &= ~round_mask; |
| } |
| } else if (unlikely(exp >= exp_max)) { |
| flags |= float_flag_overflow; |
| if (s->rebias_overflow) { |
| exp -= fmt->exp_re_bias; |
| } else if (overflow_norm) { |
| flags |= float_flag_inexact; |
| exp = exp_max - 1; |
| frac_allones(p); |
| p->frac_lo &= ~round_mask; |
| } else { |
| flags |= float_flag_inexact; |
| p->cls = float_class_inf; |
| exp = exp_max; |
| frac_clear(p); |
| } |
| } |
| frac_shr(p, frac_shift); |
| } else if (unlikely(s->rebias_underflow)) { |
| flags |= float_flag_underflow; |
| exp += fmt->exp_re_bias; |
| if (p->frac_lo & round_mask) { |
| flags |= float_flag_inexact; |
| if (frac_addi(p, p, inc)) { |
| frac_shr(p, 1); |
| p->frac_hi |= DECOMPOSED_IMPLICIT_BIT; |
| exp++; |
| } |
| p->frac_lo &= ~round_mask; |
| } |
| frac_shr(p, frac_shift); |
| } else if (s->flush_to_zero) { |
| flags |= float_flag_output_denormal; |
| p->cls = float_class_zero; |
| exp = 0; |
| frac_clear(p); |
| } else { |
| bool is_tiny = s->tininess_before_rounding || exp < 0; |
| |
| if (!is_tiny) { |
| FloatPartsN discard; |
| is_tiny = !frac_addi(&discard, p, inc); |
| } |
| |
| frac_shrjam(p, !fmt->m68k_denormal - exp); |
| |
| if (p->frac_lo & round_mask) { |
| /* Need to recompute round-to-even/round-to-odd. */ |
| switch (s->float_rounding_mode) { |
| case float_round_nearest_even: |
| if (N > 64 && frac_lsb == 0) { |
| inc = ((p->frac_hi & 1) || |
| (p->frac_lo & round_mask) != frac_lsbm1 |
| ? frac_lsbm1 : 0); |
| } else { |
| inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 |
| ? frac_lsbm1 : 0); |
| } |
| break; |
| case float_round_to_odd: |
| case float_round_to_odd_inf: |
| if (N > 64 && frac_lsb == 0) { |
| inc = p->frac_hi & 1 ? 0 : round_mask; |
| } else { |
| inc = p->frac_lo & frac_lsb ? 0 : round_mask; |
| } |
| break; |
| default: |
| break; |
| } |
| flags |= float_flag_inexact; |
| frac_addi(p, p, inc); |
| p->frac_lo &= ~round_mask; |
| } |
| |
| exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal; |
| frac_shr(p, frac_shift); |
| |
| if (is_tiny && (flags & float_flag_inexact)) { |
| flags |= float_flag_underflow; |
| } |
| if (exp == 0 && frac_eqz(p)) { |
| p->cls = float_class_zero; |
| } |
| } |
| p->exp = exp; |
| float_raise(flags, s); |
| } |
| |
| static void partsN(uncanon)(FloatPartsN *p, float_status *s, |
| const FloatFmt *fmt) |
| { |
| if (likely(p->cls == float_class_normal)) { |
| parts_uncanon_normal(p, s, fmt); |
| } else { |
| switch (p->cls) { |
| case float_class_zero: |
| p->exp = 0; |
| frac_clear(p); |
| return; |
| case float_class_inf: |
| g_assert(!fmt->arm_althp); |
| p->exp = fmt->exp_max; |
| frac_clear(p); |
| return; |
| case float_class_qnan: |
| case float_class_snan: |
| g_assert(!fmt->arm_althp); |
| p->exp = fmt->exp_max; |
| frac_shr(p, fmt->frac_shift); |
| return; |
| default: |
| break; |
| } |
| g_assert_not_reached(); |
| } |
| } |
| |
| /* |
| * Returns the result of adding or subtracting the values of the |
| * floating-point values `a' and `b'. The operation is performed |
| * according to the IEC/IEEE Standard for Binary Floating-Point |
| * Arithmetic. |
| */ |
| static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b, |
| float_status *s, bool subtract) |
| { |
| bool b_sign = b->sign ^ subtract; |
| int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
| |
| if (a->sign != b_sign) { |
| /* Subtraction */ |
| if (likely(ab_mask == float_cmask_normal)) { |
| if (parts_sub_normal(a, b)) { |
| return a; |
| } |
| /* Subtract was exact, fall through to set sign. */ |
| ab_mask = float_cmask_zero; |
| } |
| |
| if (ab_mask == float_cmask_zero) { |
| a->sign = s->float_rounding_mode == float_round_down; |
| return a; |
| } |
| |
| if (unlikely(ab_mask & float_cmask_anynan)) { |
| goto p_nan; |
| } |
| |
| if (ab_mask & float_cmask_inf) { |
| if (a->cls != float_class_inf) { |
| /* N - Inf */ |
| goto return_b; |
| } |
| if (b->cls != float_class_inf) { |
| /* Inf - N */ |
| return a; |
| } |
| /* Inf - Inf */ |
| float_raise(float_flag_invalid | float_flag_invalid_isi, s); |
| parts_default_nan(a, s); |
| return a; |
| } |
| } else { |
| /* Addition */ |
| if (likely(ab_mask == float_cmask_normal)) { |
| parts_add_normal(a, b); |
| return a; |
| } |
| |
| if (ab_mask == float_cmask_zero) { |
| return a; |
| } |
| |
| if (unlikely(ab_mask & float_cmask_anynan)) { |
| goto p_nan; |
| } |
| |
| if (ab_mask & float_cmask_inf) { |
| a->cls = float_class_inf; |
| return a; |
| } |
| } |
| |
| if (b->cls == float_class_zero) { |
| g_assert(a->cls == float_class_normal); |
| return a; |
| } |
| |
| g_assert(a->cls == float_class_zero); |
| g_assert(b->cls == float_class_normal); |
| return_b: |
| b->sign = b_sign; |
| return b; |
| |
| p_nan: |
| return parts_pick_nan(a, b, s); |
| } |
| |
| /* |
| * Returns the result of multiplying the floating-point values `a' and |
| * `b'. The operation is performed according to the IEC/IEEE Standard |
| * for Binary Floating-Point Arithmetic. |
| */ |
| static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b, |
| float_status *s) |
| { |
| int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
| bool sign = a->sign ^ b->sign; |
| |
| if (likely(ab_mask == float_cmask_normal)) { |
| FloatPartsW tmp; |
| |
| frac_mulw(&tmp, a, b); |
| frac_truncjam(a, &tmp); |
| |
| a->exp += b->exp + 1; |
| if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { |
| frac_add(a, a, a); |
| a->exp -= 1; |
| } |
| |
| a->sign = sign; |
| return a; |
| } |
| |
| /* Inf * Zero == NaN */ |
| if (unlikely(ab_mask == float_cmask_infzero)) { |
| float_raise(float_flag_invalid | float_flag_invalid_imz, s); |
| parts_default_nan(a, s); |
| return a; |
| } |
| |
| if (unlikely(ab_mask & float_cmask_anynan)) { |
| return parts_pick_nan(a, b, s); |
| } |
| |
| /* Multiply by 0 or Inf */ |
| if (ab_mask & float_cmask_inf) { |
| a->cls = float_class_inf; |
| a->sign = sign; |
| return a; |
| } |
| |
| g_assert(ab_mask & float_cmask_zero); |
| a->cls = float_class_zero; |
| a->sign = sign; |
| return a; |
| } |
| |
| /* |
| * Returns the result of multiplying the floating-point values `a' and |
| * `b' then adding 'c', with no intermediate rounding step after the |
| * multiplication. The operation is performed according to the |
| * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008. |
| * The flags argument allows the caller to select negation of the |
| * addend, the intermediate product, or the final result. (The |
| * difference between this and having the caller do a separate |
| * negation is that negating externally will flip the sign bit on NaNs.) |
| * |
| * Requires A and C extracted into a double-sized structure to provide the |
| * extra space for the widening multiply. |
| */ |
| static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b, |
| FloatPartsN *c, int flags, float_status *s) |
| { |
| int ab_mask, abc_mask; |
| FloatPartsW p_widen, c_widen; |
| |
| ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
| abc_mask = float_cmask(c->cls) | ab_mask; |
| |
| /* |
| * It is implementation-defined whether the cases of (0,inf,qnan) |
| * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN |
| * they return if they do), so we have to hand this information |
| * off to the target-specific pick-a-NaN routine. |
| */ |
| if (unlikely(abc_mask & float_cmask_anynan)) { |
| return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask); |
| } |
| |
| if (flags & float_muladd_negate_c) { |
| c->sign ^= 1; |
| } |
| |
| /* Compute the sign of the product into A. */ |
| a->sign ^= b->sign; |
| if (flags & float_muladd_negate_product) { |
| a->sign ^= 1; |
| } |
| |
| if (unlikely(ab_mask != float_cmask_normal)) { |
| if (unlikely(ab_mask == float_cmask_infzero)) { |
| float_raise(float_flag_invalid | float_flag_invalid_imz, s); |
| goto d_nan; |
| } |
| |
| if (ab_mask & float_cmask_inf) { |
| if (c->cls == float_class_inf && a->sign != c->sign) { |
| float_raise(float_flag_invalid | float_flag_invalid_isi, s); |
| goto d_nan; |
| } |
| goto return_inf; |
| } |
| |
| g_assert(ab_mask & float_cmask_zero); |
| if (c->cls == float_class_normal) { |
| *a = *c; |
| goto return_normal; |
| } |
| if (c->cls == float_class_zero) { |
| if (a->sign != c->sign) { |
| goto return_sub_zero; |
| } |
| goto return_zero; |
| } |
| g_assert(c->cls == float_class_inf); |
| } |
| |
| if (unlikely(c->cls == float_class_inf)) { |
| a->sign = c->sign; |
| goto return_inf; |
| } |
| |
| /* Perform the multiplication step. */ |
| p_widen.sign = a->sign; |
| p_widen.exp = a->exp + b->exp + 1; |
| frac_mulw(&p_widen, a, b); |
| if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) { |
| frac_add(&p_widen, &p_widen, &p_widen); |
| p_widen.exp -= 1; |
| } |
| |
| /* Perform the addition step. */ |
| if (c->cls != float_class_zero) { |
| /* Zero-extend C to less significant bits. */ |
| frac_widen(&c_widen, c); |
| c_widen.exp = c->exp; |
| |
| if (a->sign == c->sign) { |
| parts_add_normal(&p_widen, &c_widen); |
| } else if (!parts_sub_normal(&p_widen, &c_widen)) { |
| goto return_sub_zero; |
| } |
| } |
| |
| /* Narrow with sticky bit, for proper rounding later. */ |
| frac_truncjam(a, &p_widen); |
| a->sign = p_widen.sign; |
| a->exp = p_widen.exp; |
| |
| return_normal: |
| if (flags & float_muladd_halve_result) { |
| a->exp -= 1; |
| } |
| finish_sign: |
| if (flags & float_muladd_negate_result) { |
| a->sign ^= 1; |
| } |
| return a; |
| |
| return_sub_zero: |
| a->sign = s->float_rounding_mode == float_round_down; |
| return_zero: |
| a->cls = float_class_zero; |
| goto finish_sign; |
| |
| return_inf: |
| a->cls = float_class_inf; |
| goto finish_sign; |
| |
| d_nan: |
| parts_default_nan(a, s); |
| return a; |
| } |
| |
| /* |
| * Returns the result of dividing the floating-point value `a' by the |
| * corresponding value `b'. The operation is performed according to |
| * the IEC/IEEE Standard for Binary Floating-Point Arithmetic. |
| */ |
| static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b, |
| float_status *s) |
| { |
| int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
| bool sign = a->sign ^ b->sign; |
| |
| if (likely(ab_mask == float_cmask_normal)) { |
| a->sign = sign; |
| a->exp -= b->exp + frac_div(a, b); |
| return a; |
| } |
| |
| /* 0/0 or Inf/Inf => NaN */ |
| if (unlikely(ab_mask == float_cmask_zero)) { |
| float_raise(float_flag_invalid | float_flag_invalid_zdz, s); |
| goto d_nan; |
| } |
| if (unlikely(ab_mask == float_cmask_inf)) { |
| float_raise(float_flag_invalid | float_flag_invalid_idi, s); |
| goto d_nan; |
| } |
| |
| /* All the NaN cases */ |
| if (unlikely(ab_mask & float_cmask_anynan)) { |
| return parts_pick_nan(a, b, s); |
| } |
| |
| a->sign = sign; |
| |
| /* Inf / X */ |
| if (a->cls == float_class_inf) { |
| return a; |
| } |
| |
| /* 0 / X */ |
| if (a->cls == float_class_zero) { |
| return a; |
| } |
| |
| /* X / Inf */ |
| if (b->cls == float_class_inf) { |
| a->cls = float_class_zero; |
| return a; |
| } |
| |
| /* X / 0 => Inf */ |
| g_assert(b->cls == float_class_zero); |
| float_raise(float_flag_divbyzero, s); |
| a->cls = float_class_inf; |
| return a; |
| |
| d_nan: |
| parts_default_nan(a, s); |
| return a; |
| } |
| |
| /* |
| * Floating point remainder, per IEC/IEEE, or modulus. |
| */ |
| static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b, |
| uint64_t *mod_quot, float_status *s) |
| { |
| int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
| |
| if (likely(ab_mask == float_cmask_normal)) { |
| frac_modrem(a, b, mod_quot); |
| return a; |
| } |
| |
| if (mod_quot) { |
| *mod_quot = 0; |
| } |
| |
| /* All the NaN cases */ |
| if (unlikely(ab_mask & float_cmask_anynan)) { |
| return parts_pick_nan(a, b, s); |
| } |
| |
| /* Inf % N; N % 0 */ |
| if (a->cls == float_class_inf || b->cls == float_class_zero) { |
| float_raise(float_flag_invalid, s); |
| parts_default_nan(a, s); |
| return a; |
| } |
| |
| /* N % Inf; 0 % N */ |
| g_assert(b->cls == float_class_inf || a->cls == float_class_zero); |
| return a; |
| } |
| |
| /* |
| * Square Root |
| * |
| * The base algorithm is lifted from |
| * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c |
| * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c |
| * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c |
| * and is thus MIT licenced. |
| */ |
| static void partsN(sqrt)(FloatPartsN *a, float_status *status, |
| const FloatFmt *fmt) |
| { |
| const uint32_t three32 = 3u << 30; |
| const uint64_t three64 = 3ull << 62; |
| uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */ |
| uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */ |
| uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */ |
| uint64_t d0h, d0l, d1h, d1l, d2h, d2l; |
| uint64_t discard; |
| bool exp_odd; |
| size_t index; |
| |
| if (unlikely(a->cls != float_class_normal)) { |
| switch (a->cls) { |
| case float_class_snan: |
| case float_class_qnan: |
| parts_return_nan(a, status); |
| return; |
| case float_class_zero: |
| return; |
| case float_class_inf: |
| if (unlikely(a->sign)) { |
| goto d_nan; |
| } |
| return; |
| default: |
| g_assert_not_reached(); |
| } |
| } |
| |
| if (unlikely(a->sign)) { |
| goto d_nan; |
| } |
| |
| /* |
| * Argument reduction. |
| * x = 4^e frac; with integer e, and frac in [1, 4) |
| * m = frac fixed point at bit 62, since we're in base 4. |
| * If base-2 exponent is odd, exchange that for multiply by 2, |
| * which results in no shift. |
| */ |
| exp_odd = a->exp & 1; |
| index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6); |
| if (!exp_odd) { |
| frac_shr(a, 1); |
| } |
| |
| /* |
| * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4). |
| * |
| * Initial estimate: |
| * 7-bit lookup table (1-bit exponent and 6-bit significand). |
| * |
| * The relative error (e = r0*sqrt(m)-1) of a linear estimate |
| * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best; |
| * a table lookup is faster and needs one less iteration. |
| * The 7-bit table gives |e| < 0x1.fdp-9. |
| * |
| * A Newton-Raphson iteration for r is |
| * s = m*r |
| * d = s*r |
| * u = 3 - d |
| * r = r*u/2 |
| * |
| * Fixed point representations: |
| * m, s, d, u, three are all 2.30; r is 0.32 |
| */ |
| m64 = a->frac_hi; |
| m32 = m64 >> 32; |
| |
| r32 = rsqrt_tab[index] << 16; |
| /* |r*sqrt(m) - 1| < 0x1.FDp-9 */ |
| |
| s32 = ((uint64_t)m32 * r32) >> 32; |
| d32 = ((uint64_t)s32 * r32) >> 32; |
| u32 = three32 - d32; |
| |
| if (N == 64) { |
| /* float64 or smaller */ |
| |
| r32 = ((uint64_t)r32 * u32) >> 31; |
| /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */ |
| |
| s32 = ((uint64_t)m32 * r32) >> 32; |
| d32 = ((uint64_t)s32 * r32) >> 32; |
| u32 = three32 - d32; |
| |
| if (fmt->frac_size <= 23) { |
| /* float32 or smaller */ |
| |
| s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */ |
| s32 = (s32 - 1) >> 6; /* 9.23 */ |
| /* s < sqrt(m) < s + 0x1.08p-23 */ |
| |
| /* compute nearest rounded result to 2.23 bits */ |
| uint32_t d0 = (m32 << 16) - s32 * s32; |
| uint32_t d1 = s32 - d0; |
| uint32_t d2 = d1 + s32 + 1; |
| s32 += d1 >> 31; |
| a->frac_hi = (uint64_t)s32 << (64 - 25); |
| |
| /* increment or decrement for inexact */ |
| if (d2 != 0) { |
| a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1); |
| } |
| goto done; |
| } |
| |
| /* float64 */ |
| |
| r64 = (uint64_t)r32 * u32 * 2; |
| /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */ |
| mul64To128(m64, r64, &s64, &discard); |
| mul64To128(s64, r64, &d64, &discard); |
| u64 = three64 - d64; |
| |
| mul64To128(s64, u64, &s64, &discard); /* 3.61 */ |
| s64 = (s64 - 2) >> 9; /* 12.52 */ |
| |
| /* Compute nearest rounded result */ |
| uint64_t d0 = (m64 << 42) - s64 * s64; |
| uint64_t d1 = s64 - d0; |
| uint64_t d2 = d1 + s64 + 1; |
| s64 += d1 >> 63; |
| a->frac_hi = s64 << (64 - 54); |
| |
| /* increment or decrement for inexact */ |
| if (d2 != 0) { |
| a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1); |
| } |
| goto done; |
| } |
| |
| r64 = (uint64_t)r32 * u32 * 2; |
| /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */ |
| |
| mul64To128(m64, r64, &s64, &discard); |
| mul64To128(s64, r64, &d64, &discard); |
| u64 = three64 - d64; |
| mul64To128(u64, r64, &r64, &discard); |
| r64 <<= 1; |
| /* |r*sqrt(m) - 1| < 0x1.a5p-31 */ |
| |
| mul64To128(m64, r64, &s64, &discard); |
| mul64To128(s64, r64, &d64, &discard); |
| u64 = three64 - d64; |
| mul64To128(u64, r64, &rh, &rl); |
| add128(rh, rl, rh, rl, &rh, &rl); |
| /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */ |
| |
| mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard); |
| mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard); |
| sub128(three64, 0, dh, dl, &uh, &ul); |
| mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */ |
| /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */ |
| |
| sub128(sh, sl, 0, 4, &sh, &sl); |
| shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */ |
| /* s < sqrt(m) < s + 1ulp */ |
| |
| /* Compute nearest rounded result */ |
| mul64To128(sl, sl, &d0h, &d0l); |
| d0h += 2 * sh * sl; |
| sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l); |
| sub128(sh, sl, d0h, d0l, &d1h, &d1l); |
| add128(sh, sl, 0, 1, &d2h, &d2l); |
| add128(d2h, d2l, d1h, d1l, &d2h, &d2l); |
| add128(sh, sl, 0, d1h >> 63, &sh, &sl); |
| shift128Left(sh, sl, 128 - 114, &sh, &sl); |
| |
| /* increment or decrement for inexact */ |
| if (d2h | d2l) { |
| if ((int64_t)(d1h ^ d2h) < 0) { |
| sub128(sh, sl, 0, 1, &sh, &sl); |
| } else { |
| add128(sh, sl, 0, 1, &sh, &sl); |
| } |
| } |
| a->frac_lo = sl; |
| a->frac_hi = sh; |
| |
| done: |
| /* Convert back from base 4 to base 2. */ |
| a->exp >>= 1; |
| if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) { |
| frac_add(a, a, a); |
| } else { |
| a->exp += 1; |
| } |
| return; |
| |
| d_nan: |
| float_raise(float_flag_invalid | float_flag_invalid_sqrt, status); |
| parts_default_nan(a, status); |
| } |
| |
| /* |
| * Rounds the floating-point value `a' to an integer, and returns the |
| * result as a floating-point value. The operation is performed |
| * according to the IEC/IEEE Standard for Binary Floating-Point |
| * Arithmetic. |
| * |
| * parts_round_to_int_normal is an internal helper function for |
| * normal numbers only, returning true for inexact but not directly |
| * raising float_flag_inexact. |
| */ |
| static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode, |
| int scale, int frac_size) |
| { |
| uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc; |
| int shift_adj; |
| |
| scale = MIN(MAX(scale, -0x10000), 0x10000); |
| a->exp += scale; |
| |
| if (a->exp < 0) { |
| bool one; |
| |
| /* All fractional */ |
| switch (rmode) { |
| case float_round_nearest_even: |
| one = false; |
| if (a->exp == -1) { |
| FloatPartsN tmp; |
| /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */ |
| frac_add(&tmp, a, a); |
| /* Anything remaining means frac > 0.5. */ |
| one = !frac_eqz(&tmp); |
| } |
| break; |
| case float_round_ties_away: |
| one = a->exp == -1; |
| break; |
| case float_round_to_zero: |
| one = false; |
| break; |
| case float_round_up: |
| one = !a->sign; |
| break; |
| case float_round_down: |
| one = a->sign; |
| break; |
| case float_round_to_odd: |
| one = true; |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| |
| frac_clear(a); |
| a->exp = 0; |
| if (one) { |
| a->frac_hi = DECOMPOSED_IMPLICIT_BIT; |
| } else { |
| a->cls = float_class_zero; |
| } |
| return true; |
| } |
| |
| if (a->exp >= frac_size) { |
| /* All integral */ |
| return false; |
| } |
| |
| if (N > 64 && a->exp < N - 64) { |
| /* |
| * Rounding is not in the low word -- shift lsb to bit 2, |
| * which leaves room for sticky and rounding bit. |
| */ |
| shift_adj = (N - 1) - (a->exp + 2); |
| frac_shrjam(a, shift_adj); |
| frac_lsb = 1 << 2; |
| } else { |
| shift_adj = 0; |
| frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63); |
| } |
| |
| frac_lsbm1 = frac_lsb >> 1; |
| rnd_mask = frac_lsb - 1; |
| rnd_even_mask = rnd_mask | frac_lsb; |
| |
| if (!(a->frac_lo & rnd_mask)) { |
| /* Fractional bits already clear, undo the shift above. */ |
| frac_shl(a, shift_adj); |
| return false; |
| } |
| |
| switch (rmode) { |
| case float_round_nearest_even: |
| inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0); |
| break; |
| case float_round_ties_away: |
| inc = frac_lsbm1; |
| break; |
| case float_round_to_zero: |
| inc = 0; |
| break; |
| case float_round_up: |
| inc = a->sign ? 0 : rnd_mask; |
| break; |
| case float_round_down: |
| inc = a->sign ? rnd_mask : 0; |
| break; |
| case float_round_to_odd: |
| inc = a->frac_lo & frac_lsb ? 0 : rnd_mask; |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| |
| if (shift_adj == 0) { |
| if (frac_addi(a, a, inc)) { |
| frac_shr(a, 1); |
| a->frac_hi |= DECOMPOSED_IMPLICIT_BIT; |
| a->exp++; |
| } |
| a->frac_lo &= ~rnd_mask; |
| } else { |
| frac_addi(a, a, inc); |
| a->frac_lo &= ~rnd_mask; |
| /* Be careful shifting back, not to overflow */ |
| frac_shl(a, shift_adj - 1); |
| if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) { |
| a->exp++; |
| } else { |
| frac_add(a, a, a); |
| } |
| } |
| return true; |
| } |
| |
| static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode, |
| int scale, float_status *s, |
| const FloatFmt *fmt) |
| { |
| switch (a->cls) { |
| case float_class_qnan: |
| case float_class_snan: |
| parts_return_nan(a, s); |
| break; |
| case float_class_zero: |
| case float_class_inf: |
| break; |
| case float_class_normal: |
| if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) { |
| float_raise(float_flag_inexact, s); |
| } |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| } |
| |
| /* |
| * Returns the result of converting the floating-point value `a' to |
| * the two's complement integer format. The conversion is performed |
| * according to the IEC/IEEE Standard for Binary Floating-Point |
| * Arithmetic---which means in particular that the conversion is |
| * rounded according to the current rounding mode. If `a' is a NaN, |
| * the largest positive integer is returned. Otherwise, if the |
| * conversion overflows, the largest integer with the same sign as `a' |
| * is returned. |
| */ |
| static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode, |
| int scale, int64_t min, int64_t max, |
| float_status *s) |
| { |
| int flags = 0; |
| uint64_t r; |
| |
| switch (p->cls) { |
| case float_class_snan: |
| flags |= float_flag_invalid_snan; |
| /* fall through */ |
| case float_class_qnan: |
| flags |= float_flag_invalid; |
| r = max; |
| break; |
| |
| case float_class_inf: |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| r = p->sign ? min : max; |
| break; |
| |
| case float_class_zero: |
| return 0; |
| |
| case float_class_normal: |
| /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ |
| if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { |
| flags = float_flag_inexact; |
| } |
| |
| if (p->exp <= DECOMPOSED_BINARY_POINT) { |
| r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); |
| } else { |
| r = UINT64_MAX; |
| } |
| if (p->sign) { |
| if (r <= -(uint64_t)min) { |
| r = -r; |
| } else { |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| r = min; |
| } |
| } else if (r > max) { |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| r = max; |
| } |
| break; |
| |
| default: |
| g_assert_not_reached(); |
| } |
| |
| float_raise(flags, s); |
| return r; |
| } |
| |
| /* |
| * Returns the result of converting the floating-point value `a' to |
| * the unsigned integer format. The conversion is performed according |
| * to the IEC/IEEE Standard for Binary Floating-Point |
| * Arithmetic---which means in particular that the conversion is |
| * rounded according to the current rounding mode. If `a' is a NaN, |
| * the largest unsigned integer is returned. Otherwise, if the |
| * conversion overflows, the largest unsigned integer is returned. If |
| * the 'a' is negative, the result is rounded and zero is returned; |
| * values that do not round to zero will raise the inexact exception |
| * flag. |
| */ |
| static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode, |
| int scale, uint64_t max, float_status *s) |
| { |
| int flags = 0; |
| uint64_t r; |
| |
| switch (p->cls) { |
| case float_class_snan: |
| flags |= float_flag_invalid_snan; |
| /* fall through */ |
| case float_class_qnan: |
| flags |= float_flag_invalid; |
| r = max; |
| break; |
| |
| case float_class_inf: |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| r = p->sign ? 0 : max; |
| break; |
| |
| case float_class_zero: |
| return 0; |
| |
| case float_class_normal: |
| /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ |
| if (parts_round_to_int_normal(p, rmode, scale, N - 2)) { |
| flags = float_flag_inexact; |
| if (p->cls == float_class_zero) { |
| r = 0; |
| break; |
| } |
| } |
| |
| if (p->sign) { |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| r = 0; |
| } else if (p->exp > DECOMPOSED_BINARY_POINT) { |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| r = max; |
| } else { |
| r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); |
| if (r > max) { |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| r = max; |
| } |
| } |
| break; |
| |
| default: |
| g_assert_not_reached(); |
| } |
| |
| float_raise(flags, s); |
| return r; |
| } |
| |
| /* |
| * Like partsN(float_to_sint), except do not saturate the result. |
| * Instead, return the rounded unbounded precision two's compliment result, |
| * modulo 2**(bitsm1 + 1). |
| */ |
| static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p, |
| FloatRoundMode rmode, |
| int bitsm1, float_status *s) |
| { |
| int flags = 0; |
| uint64_t r; |
| bool overflow = false; |
| |
| switch (p->cls) { |
| case float_class_snan: |
| flags |= float_flag_invalid_snan; |
| /* fall through */ |
| case float_class_qnan: |
| flags |= float_flag_invalid; |
| r = 0; |
| break; |
| |
| case float_class_inf: |
| overflow = true; |
| r = 0; |
| break; |
| |
| case float_class_zero: |
| return 0; |
| |
| case float_class_normal: |
| /* TODO: N - 2 is frac_size for rounding; could use input fmt. */ |
| if (parts_round_to_int_normal(p, rmode, 0, N - 2)) { |
| flags = float_flag_inexact; |
| } |
| |
| if (p->exp <= DECOMPOSED_BINARY_POINT) { |
| /* |
| * Because we rounded to integral, and exp < 64, |
| * we know frac_low is zero. |
| */ |
| r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp); |
| if (p->exp < bitsm1) { |
| /* Result in range. */ |
| } else if (p->exp == bitsm1) { |
| /* The only in-range value is INT_MIN. */ |
| overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT; |
| } else { |
| overflow = true; |
| } |
| } else { |
| /* Overflow, but there might still be bits to return. */ |
| int shl = p->exp - DECOMPOSED_BINARY_POINT; |
| if (shl < N) { |
| frac_shl(p, shl); |
| r = p->frac_hi; |
| } else { |
| r = 0; |
| } |
| overflow = true; |
| } |
| |
| if (p->sign) { |
| r = -r; |
| } |
| break; |
| |
| default: |
| g_assert_not_reached(); |
| } |
| |
| if (overflow) { |
| flags = float_flag_invalid | float_flag_invalid_cvti; |
| } |
| float_raise(flags, s); |
| return r; |
| } |
| |
| /* |
| * Integer to float conversions |
| * |
| * Returns the result of converting the two's complement integer `a' |
| * to the floating-point format. The conversion is performed according |
| * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. |
| */ |
| static void partsN(sint_to_float)(FloatPartsN *p, int64_t a, |
| int scale, float_status *s) |
| { |
| uint64_t f = a; |
| int shift; |
| |
| memset(p, 0, sizeof(*p)); |
| |
| if (a == 0) { |
| p->cls = float_class_zero; |
| return; |
| } |
| |
| p->cls = float_class_normal; |
| if (a < 0) { |
| f = -f; |
| p->sign = true; |
| } |
| shift = clz64(f); |
| scale = MIN(MAX(scale, -0x10000), 0x10000); |
| |
| p->exp = DECOMPOSED_BINARY_POINT - shift + scale; |
| p->frac_hi = f << shift; |
| } |
| |
| /* |
| * Unsigned Integer to float conversions |
| * |
| * Returns the result of converting the unsigned integer `a' to the |
| * floating-point format. The conversion is performed according to the |
| * IEC/IEEE Standard for Binary Floating-Point Arithmetic. |
| */ |
| static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a, |
| int scale, float_status *status) |
| { |
| memset(p, 0, sizeof(*p)); |
| |
| if (a == 0) { |
| p->cls = float_class_zero; |
| } else { |
| int shift = clz64(a); |
| scale = MIN(MAX(scale, -0x10000), 0x10000); |
| p->cls = float_class_normal; |
| p->exp = DECOMPOSED_BINARY_POINT - shift + scale; |
| p->frac_hi = a << shift; |
| } |
| } |
| |
| /* |
| * Float min/max. |
| */ |
| static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b, |
| float_status *s, int flags) |
| { |
| int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
| int a_exp, b_exp, cmp; |
| |
| if (unlikely(ab_mask & float_cmask_anynan)) { |
| /* |
| * For minNum/maxNum (IEEE 754-2008) |
| * or minimumNumber/maximumNumber (IEEE 754-2019), |
| * if one operand is a QNaN, and the other |
| * operand is numerical, then return numerical argument. |
| */ |
| if ((flags & (minmax_isnum | minmax_isnumber)) |
| && !(ab_mask & float_cmask_snan) |
| && (ab_mask & ~float_cmask_qnan)) { |
| return is_nan(a->cls) ? b : a; |
| } |
| |
| /* |
| * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag |
| * are removed and replaced with minimum, minimumNumber, maximum |
| * and maximumNumber. |
| * minimumNumber/maximumNumber behavior for SNaN is changed to: |
| * If both operands are NaNs, a QNaN is returned. |
| * If either operand is a SNaN, |
| * an invalid operation exception is signaled, |
| * but unless both operands are NaNs, |
| * the SNaN is otherwise ignored and not converted to a QNaN. |
| */ |
| if ((flags & minmax_isnumber) |
| && (ab_mask & float_cmask_snan) |
| && (ab_mask & ~float_cmask_anynan)) { |
| float_raise(float_flag_invalid, s); |
| return is_nan(a->cls) ? b : a; |
| } |
| |
| return parts_pick_nan(a, b, s); |
| } |
| |
| a_exp = a->exp; |
| b_exp = b->exp; |
| |
| if (unlikely(ab_mask != float_cmask_normal)) { |
| switch (a->cls) { |
| case float_class_normal: |
| break; |
| case float_class_inf: |
| a_exp = INT16_MAX; |
| break; |
| case float_class_zero: |
| a_exp = INT16_MIN; |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| switch (b->cls) { |
| case float_class_normal: |
| break; |
| case float_class_inf: |
| b_exp = INT16_MAX; |
| break; |
| case float_class_zero: |
| b_exp = INT16_MIN; |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| } |
| |
| /* Compare magnitudes. */ |
| cmp = a_exp - b_exp; |
| if (cmp == 0) { |
| cmp = frac_cmp(a, b); |
| } |
| |
| /* |
| * Take the sign into account. |
| * For ismag, only do this if the magnitudes are equal. |
| */ |
| if (!(flags & minmax_ismag) || cmp == 0) { |
| if (a->sign != b->sign) { |
| /* For differing signs, the negative operand is less. */ |
| cmp = a->sign ? -1 : 1; |
| } else if (a->sign) { |
| /* For two negative operands, invert the magnitude comparison. */ |
| cmp = -cmp; |
| } |
| } |
| |
| if (flags & minmax_ismin) { |
| cmp = -cmp; |
| } |
| return cmp < 0 ? b : a; |
| } |
| |
| /* |
| * Floating point compare |
| */ |
| static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b, |
| float_status *s, bool is_quiet) |
| { |
| int ab_mask = float_cmask(a->cls) | float_cmask(b->cls); |
| |
| if (likely(ab_mask == float_cmask_normal)) { |
| FloatRelation cmp; |
| |
| if (a->sign != b->sign) { |
| goto a_sign; |
| } |
| if (a->exp == b->exp) { |
| cmp = frac_cmp(a, b); |
| } else if (a->exp < b->exp) { |
| cmp = float_relation_less; |
| } else { |
| cmp = float_relation_greater; |
| } |
| if (a->sign) { |
| cmp = -cmp; |
| } |
| return cmp; |
| } |
| |
| if (unlikely(ab_mask & float_cmask_anynan)) { |
| if (ab_mask & float_cmask_snan) { |
| float_raise(float_flag_invalid | float_flag_invalid_snan, s); |
| } else if (!is_quiet) { |
| float_raise(float_flag_invalid, s); |
| } |
| return float_relation_unordered; |
| } |
| |
| if (ab_mask & float_cmask_zero) { |
| if (ab_mask == float_cmask_zero) { |
| return float_relation_equal; |
| } else if (a->cls == float_class_zero) { |
| goto b_sign; |
| } else { |
| goto a_sign; |
| } |
| } |
| |
| if (ab_mask == float_cmask_inf) { |
| if (a->sign == b->sign) { |
| return float_relation_equal; |
| } |
| } else if (b->cls == float_class_inf) { |
| goto b_sign; |
| } else { |
| g_assert(a->cls == float_class_inf); |
| } |
| |
| a_sign: |
| return a->sign ? float_relation_less : float_relation_greater; |
| b_sign: |
| return b->sign ? float_relation_greater : float_relation_less; |
| } |
| |
| /* |
| * Multiply A by 2 raised to the power N. |
| */ |
| static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s) |
| { |
| switch (a->cls) { |
| case float_class_snan: |
| case float_class_qnan: |
| parts_return_nan(a, s); |
| break; |
| case float_class_zero: |
| case float_class_inf: |
| break; |
| case float_class_normal: |
| a->exp += MIN(MAX(n, -0x10000), 0x10000); |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| } |
| |
| /* |
| * Return log2(A) |
| */ |
| static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt) |
| { |
| uint64_t a0, a1, r, t, ign; |
| FloatPartsN f; |
| int i, n, a_exp, f_exp; |
| |
| if (unlikely(a->cls != float_class_normal)) { |
| switch (a->cls) { |
| case float_class_snan: |
| case float_class_qnan: |
| parts_return_nan(a, s); |
| return; |
| case float_class_zero: |
| float_raise(float_flag_divbyzero, s); |
| /* log2(0) = -inf */ |
| a->cls = float_class_inf; |
| a->sign = 1; |
| return; |
| case float_class_inf: |
| if (unlikely(a->sign)) { |
| goto d_nan; |
| } |
| return; |
| default: |
| break; |
| } |
| g_assert_not_reached(); |
| } |
| if (unlikely(a->sign)) { |
| goto d_nan; |
| } |
| |
| /* TODO: This algorithm looses bits too quickly for float128. */ |
| g_assert(N == 64); |
| |
| a_exp = a->exp; |
| f_exp = -1; |
| |
| r = 0; |
| t = DECOMPOSED_IMPLICIT_BIT; |
| a0 = a->frac_hi; |
| a1 = 0; |
| |
| n = fmt->frac_size + 2; |
| if (unlikely(a_exp == -1)) { |
| /* |
| * When a_exp == -1, we're computing the log2 of a value [0.5,1.0). |
| * When the value is very close to 1.0, there are lots of 1's in |
| * the msb parts of the fraction. At the end, when we subtract |
| * this value from -1.0, we can see a catastrophic loss of precision, |
| * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the |
| * bits of y in the final result. To minimize this, compute as many |
| * digits as we can. |
| * ??? This case needs another algorithm to avoid this. |
| */ |
| n = fmt->frac_size * 2 + 2; |
| /* Don't compute a value overlapping the sticky bit */ |
| n = MIN(n, 62); |
| } |
| |
| for (i = 0; i < n; i++) { |
| if (a1) { |
| mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign); |
| } else if (a0 & 0xffffffffull) { |
| mul64To128(a0, a0, &a0, &a1); |
| } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) { |
| a0 >>= 32; |
| a0 *= a0; |
| } else { |
| goto exact; |
| } |
| |
| if (a0 & DECOMPOSED_IMPLICIT_BIT) { |
| if (unlikely(a_exp == 0 && r == 0)) { |
| /* |
| * When a_exp == 0, we're computing the log2 of a value |
| * [1.0,2.0). When the value is very close to 1.0, there |
| * are lots of 0's in the msb parts of the fraction. |
| * We need to compute more digits to produce a correct |
| * result -- restart at the top of the fraction. |
| * ??? This is likely to lose precision quickly, as for |
| * float128; we may need another method. |
| */ |
| f_exp -= i; |
| t = r = DECOMPOSED_IMPLICIT_BIT; |
| i = 0; |
| } else { |
| r |= t; |
| } |
| } else { |
| add128(a0, a1, a0, a1, &a0, &a1); |
| } |
| t >>= 1; |
| } |
| |
| /* Set sticky for inexact. */ |
| r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT); |
| |
| exact: |
| parts_sint_to_float(a, a_exp, 0, s); |
| if (r == 0) { |
| return; |
| } |
| |
| memset(&f, 0, sizeof(f)); |
| f.cls = float_class_normal; |
| f.frac_hi = r; |
| f.exp = f_exp - frac_normalize(&f); |
| |
| if (a_exp < 0) { |
| parts_sub_normal(a, &f); |
| } else if (a_exp > 0) { |
| parts_add_normal(a, &f); |
| } else { |
| *a = f; |
| } |
| return; |
| |
| d_nan: |
| float_raise(float_flag_invalid, s); |
| parts_default_nan(a, s); |
| } |