Blame - fpu/softfloat-parts.c.inc - qemu

blob: bbeadaa1894771ed8402cb74a4db4fcdcfaa0df7 [file] [log] [blame]

Richard Henderson	7c45bad	2020-10-23 16:47:04 -0700	[diff] [blame]	1	/*
				2	* QEMU float support
				3	*
				4	* The code in this source file is derived from release 2a of the SoftFloat
				5	* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
				6	* some later contributions) are provided under that license, as detailed below.
				7	* It has subsequently been modified by contributors to the QEMU Project,
				8	* so some portions are provided under:
				9	* the SoftFloat-2a license
				10	* the BSD license
				11	* GPL-v2-or-later
				12	*
				13	* Any future contributions to this file after December 1st 2014 will be
				14	* taken to be licensed under the Softfloat-2a license unless specifically
				15	* indicated otherwise.
				16	*/
				17
				18	static void partsN(return_nan)(FloatPartsN a, float_status s)
				19	{
				20	switch (a->cls) {
				21	case float_class_snan:
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	22	float_raise(float_flag_invalid \| float_flag_invalid_snan, s);
Richard Henderson	7c45bad	2020-10-23 16:47:04 -0700	[diff] [blame]	23	if (s->default_nan_mode) {
				24	parts_default_nan(a, s);
				25	} else {
				26	parts_silence_nan(a, s);
				27	}
				28	break;
				29	case float_class_qnan:
				30	if (s->default_nan_mode) {
				31	parts_default_nan(a, s);
				32	}
				33	break;
				34	default:
				35	g_assert_not_reached();
				36	}
				37	}
Richard Henderson	22c355f	2020-10-23 17:03:11 -0700	[diff] [blame]	38
				39	static FloatPartsN partsN(pick_nan)(FloatPartsN a, FloatPartsN *b,
				40	float_status *s)
				41	{
				42	if (is_snan(a->cls) \|\| is_snan(b->cls)) {
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	43	float_raise(float_flag_invalid \| float_flag_invalid_snan, s);
Richard Henderson	22c355f	2020-10-23 17:03:11 -0700	[diff] [blame]	44	}
				45
				46	if (s->default_nan_mode) {
				47	parts_default_nan(a, s);
				48	} else {
				49	int cmp = frac_cmp(a, b);
				50	if (cmp == 0) {
				51	cmp = a->sign < b->sign;
				52	}
				53
				54	if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
				55	a = b;
				56	}
				57	if (is_snan(a->cls)) {
				58	parts_silence_nan(a, s);
				59	}
				60	}
				61	return a;
				62	}
Richard Henderson	979582d	2020-10-23 17:12:12 -0700	[diff] [blame]	63
				64	static FloatPartsN partsN(pick_nan_muladd)(FloatPartsN a, FloatPartsN *b,
				65	FloatPartsN c, float_status s,
				66	int ab_mask, int abc_mask)
				67	{
				68	int which;
				69
				70	if (unlikely(abc_mask & float_cmask_snan)) {
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	71	float_raise(float_flag_invalid \| float_flag_invalid_snan, s);
Richard Henderson	979582d	2020-10-23 17:12:12 -0700	[diff] [blame]	72	}
				73
				74	which = pickNaNMulAdd(a->cls, b->cls, c->cls,
				75	ab_mask == float_cmask_infzero, s);
				76
				77	if (s->default_nan_mode \|\| which == 3) {
				78	/*
				79	* Note that this check is after pickNaNMulAdd so that function
				80	* has an opportunity to set the Invalid flag for infzero.
				81	*/
				82	parts_default_nan(a, s);
				83	return a;
				84	}
				85
				86	switch (which) {
				87	case 0:
				88	break;
				89	case 1:
				90	a = b;
				91	break;
				92	case 2:
				93	a = c;
				94	break;
				95	default:
				96	g_assert_not_reached();
				97	}
				98	if (is_snan(a->cls)) {
				99	parts_silence_nan(a, s);
				100	}
				101	return a;
				102	}
Richard Henderson	d46975b	2020-11-08 13:01:55 -0800	[diff] [blame]	103
				104	/*
				105	* Canonicalize the FloatParts structure. Determine the class,
				106	* unbias the exponent, and normalize the fraction.
				107	*/
				108	static void partsN(canonicalize)(FloatPartsN p, float_status status,
				109	const FloatFmt *fmt)
				110	{
				111	if (unlikely(p->exp == 0)) {
				112	if (likely(frac_eqz(p))) {
				113	p->cls = float_class_zero;
				114	} else if (status->flush_inputs_to_zero) {
				115	float_raise(float_flag_input_denormal, status);
				116	p->cls = float_class_zero;
				117	frac_clear(p);
				118	} else {
				119	int shift = frac_normalize(p);
				120	p->cls = float_class_normal;
				121	p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
				122	}
				123	} else if (likely(p->exp < fmt->exp_max) \|\| fmt->arm_althp) {
				124	p->cls = float_class_normal;
				125	p->exp -= fmt->exp_bias;
				126	frac_shl(p, fmt->frac_shift);
				127	p->frac_hi \|= DECOMPOSED_IMPLICIT_BIT;
				128	} else if (likely(frac_eqz(p))) {
				129	p->cls = float_class_inf;
				130	} else {
				131	frac_shl(p, fmt->frac_shift);
				132	p->cls = (parts_is_snan_frac(p->frac_hi, status)
				133	? float_class_snan : float_class_qnan);
				134	}
				135	}
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	136
				137	/*
				138	* Round and uncanonicalize a floating-point number by parts. There
				139	* are FRAC_SHIFT bits that may require rounding at the bottom of the
				140	* fraction; these bits will be removed. The exponent will be biased
				141	* by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
				142	*/
Richard Henderson	25fdedf	2020-11-20 12:11:08 -0800	[diff] [blame]	143	static void partsN(uncanon_normal)(FloatPartsN p, float_status s,
				144	const FloatFmt *fmt)
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	145	{
				146	const int exp_max = fmt->exp_max;
				147	const int frac_shift = fmt->frac_shift;
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	148	const uint64_t round_mask = fmt->round_mask;
Richard Henderson	d6e1f0c	2020-11-20 18:28:31 -0800	[diff] [blame]	149	const uint64_t frac_lsb = round_mask + 1;
				150	const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
				151	const uint64_t roundeven_mask = round_mask \| frac_lsb;
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	152	uint64_t inc;
Richard Henderson	25fdedf	2020-11-20 12:11:08 -0800	[diff] [blame]	153	bool overflow_norm = false;
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	154	int exp, flags = 0;
				155
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	156	switch (s->float_rounding_mode) {
				157	case float_round_nearest_even:
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	158	if (N > 64 && frac_lsb == 0) {
				159	inc = ((p->frac_hi & 1) \|\| (p->frac_lo & round_mask) != frac_lsbm1
				160	? frac_lsbm1 : 0);
				161	} else {
				162	inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
				163	? frac_lsbm1 : 0);
				164	}
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	165	break;
				166	case float_round_ties_away:
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	167	inc = frac_lsbm1;
				168	break;
				169	case float_round_to_zero:
				170	overflow_norm = true;
				171	inc = 0;
				172	break;
				173	case float_round_up:
				174	inc = p->sign ? 0 : round_mask;
				175	overflow_norm = p->sign;
				176	break;
				177	case float_round_down:
				178	inc = p->sign ? round_mask : 0;
				179	overflow_norm = !p->sign;
				180	break;
				181	case float_round_to_odd:
				182	overflow_norm = true;
Richard Henderson	60c8f72	2021-05-25 15:58:10 -0700	[diff] [blame]	183	/* fall through */
				184	case float_round_to_odd_inf:
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	185	if (N > 64 && frac_lsb == 0) {
				186	inc = p->frac_hi & 1 ? 0 : round_mask;
				187	} else {
				188	inc = p->frac_lo & frac_lsb ? 0 : round_mask;
				189	}
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	190	break;
				191	default:
				192	g_assert_not_reached();
				193	}
				194
				195	exp = p->exp + fmt->exp_bias;
				196	if (likely(exp > 0)) {
				197	if (p->frac_lo & round_mask) {
				198	flags \|= float_flag_inexact;
				199	if (frac_addi(p, p, inc)) {
				200	frac_shr(p, 1);
				201	p->frac_hi \|= DECOMPOSED_IMPLICIT_BIT;
				202	exp++;
				203	}
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	204	p->frac_lo &= ~round_mask;
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	205	}
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	206
				207	if (fmt->arm_althp) {
				208	/* ARM Alt HP eschews Inf and NaN for a wider exponent. */
				209	if (unlikely(exp > exp_max)) {
				210	/* Overflow. Return the maximum normal. */
				211	flags = float_flag_invalid;
				212	exp = exp_max;
				213	frac_allones(p);
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	214	p->frac_lo &= ~round_mask;
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	215	}
				216	} else if (unlikely(exp >= exp_max)) {
				217	flags \|= float_flag_overflow \| float_flag_inexact;
				218	if (overflow_norm) {
				219	exp = exp_max - 1;
				220	frac_allones(p);
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	221	p->frac_lo &= ~round_mask;
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	222	} else {
				223	p->cls = float_class_inf;
				224	exp = exp_max;
				225	frac_clear(p);
				226	}
				227	}
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	228	frac_shr(p, frac_shift);
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	229	} else if (s->flush_to_zero) {
				230	flags \|= float_flag_output_denormal;
				231	p->cls = float_class_zero;
				232	exp = 0;
				233	frac_clear(p);
				234	} else {
				235	bool is_tiny = s->tininess_before_rounding \|\| exp < 0;
				236
				237	if (!is_tiny) {
				238	FloatPartsN discard;
				239	is_tiny = !frac_addi(&discard, p, inc);
				240	}
				241
				242	frac_shrjam(p, 1 - exp);
				243
				244	if (p->frac_lo & round_mask) {
				245	/* Need to recompute round-to-even/round-to-odd. */
				246	switch (s->float_rounding_mode) {
				247	case float_round_nearest_even:
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	248	if (N > 64 && frac_lsb == 0) {
				249	inc = ((p->frac_hi & 1) \|\|
				250	(p->frac_lo & round_mask) != frac_lsbm1
				251	? frac_lsbm1 : 0);
				252	} else {
				253	inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
				254	? frac_lsbm1 : 0);
				255	}
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	256	break;
				257	case float_round_to_odd:
Richard Henderson	60c8f72	2021-05-25 15:58:10 -0700	[diff] [blame]	258	case float_round_to_odd_inf:
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	259	if (N > 64 && frac_lsb == 0) {
				260	inc = p->frac_hi & 1 ? 0 : round_mask;
				261	} else {
				262	inc = p->frac_lo & frac_lsb ? 0 : round_mask;
				263	}
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	264	break;
				265	default:
				266	break;
				267	}
				268	flags \|= float_flag_inexact;
				269	frac_addi(p, p, inc);
Richard Henderson	98b3cff	2020-11-21 16:35:54 -0800	[diff] [blame]	270	p->frac_lo &= ~round_mask;
Richard Henderson	ee6959f	2020-10-23 17:53:55 -0700	[diff] [blame]	271	}
				272
				273	exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
				274	frac_shr(p, frac_shift);
				275
				276	if (is_tiny && (flags & float_flag_inexact)) {
				277	flags \|= float_flag_underflow;
				278	}
				279	if (exp == 0 && frac_eqz(p)) {
				280	p->cls = float_class_zero;
				281	}
				282	}
				283	p->exp = exp;
				284	float_raise(flags, s);
				285	}
Richard Henderson	da10a90	2020-10-22 15:22:55 -0700	[diff] [blame]	286
Richard Henderson	25fdedf	2020-11-20 12:11:08 -0800	[diff] [blame]	287	static void partsN(uncanon)(FloatPartsN p, float_status s,
				288	const FloatFmt *fmt)
				289	{
				290	if (likely(p->cls == float_class_normal)) {
				291	parts_uncanon_normal(p, s, fmt);
				292	} else {
				293	switch (p->cls) {
				294	case float_class_zero:
				295	p->exp = 0;
				296	frac_clear(p);
				297	return;
				298	case float_class_inf:
				299	g_assert(!fmt->arm_althp);
				300	p->exp = fmt->exp_max;
				301	frac_clear(p);
				302	return;
				303	case float_class_qnan:
				304	case float_class_snan:
				305	g_assert(!fmt->arm_althp);
				306	p->exp = fmt->exp_max;
				307	frac_shr(p, fmt->frac_shift);
				308	return;
				309	default:
				310	break;
				311	}
				312	g_assert_not_reached();
				313	}
				314	}
				315
Richard Henderson	da10a90	2020-10-22 15:22:55 -0700	[diff] [blame]	316	/*
				317	* Returns the result of adding or subtracting the values of the
				318	* floating-point values `a' and `b'. The operation is performed
				319	* according to the IEC/IEEE Standard for Binary Floating-Point
				320	* Arithmetic.
				321	*/
				322	static FloatPartsN partsN(addsub)(FloatPartsN a, FloatPartsN *b,
				323	float_status *s, bool subtract)
				324	{
				325	bool b_sign = b->sign ^ subtract;
				326	int ab_mask = float_cmask(a->cls) \| float_cmask(b->cls);
				327
				328	if (a->sign != b_sign) {
				329	/* Subtraction */
				330	if (likely(ab_mask == float_cmask_normal)) {
				331	if (parts_sub_normal(a, b)) {
				332	return a;
				333	}
				334	/* Subtract was exact, fall through to set sign. */
				335	ab_mask = float_cmask_zero;
				336	}
				337
				338	if (ab_mask == float_cmask_zero) {
				339	a->sign = s->float_rounding_mode == float_round_down;
				340	return a;
				341	}
				342
				343	if (unlikely(ab_mask & float_cmask_anynan)) {
				344	goto p_nan;
				345	}
				346
				347	if (ab_mask & float_cmask_inf) {
				348	if (a->cls != float_class_inf) {
				349	/* N - Inf */
				350	goto return_b;
				351	}
				352	if (b->cls != float_class_inf) {
				353	/* Inf - N */
				354	return a;
				355	}
				356	/* Inf - Inf */
Richard Henderson	ba11446	2021-12-17 17:57:14 +0100	[diff] [blame]	357	float_raise(float_flag_invalid \| float_flag_invalid_isi, s);
Richard Henderson	da10a90	2020-10-22 15:22:55 -0700	[diff] [blame]	358	parts_default_nan(a, s);
				359	return a;
				360	}
				361	} else {
				362	/* Addition */
				363	if (likely(ab_mask == float_cmask_normal)) {
				364	parts_add_normal(a, b);
				365	return a;
				366	}
				367
				368	if (ab_mask == float_cmask_zero) {
				369	return a;
				370	}
				371
				372	if (unlikely(ab_mask & float_cmask_anynan)) {
				373	goto p_nan;
				374	}
				375
				376	if (ab_mask & float_cmask_inf) {
				377	a->cls = float_class_inf;
				378	return a;
				379	}
				380	}
				381
				382	if (b->cls == float_class_zero) {
				383	g_assert(a->cls == float_class_normal);
				384	return a;
				385	}
				386
				387	g_assert(a->cls == float_class_zero);
				388	g_assert(b->cls == float_class_normal);
				389	return_b:
				390	b->sign = b_sign;
				391	return b;
				392
				393	p_nan:
				394	return parts_pick_nan(a, b, s);
				395	}
Richard Henderson	aca8452	2020-11-11 20:44:57 -0800	[diff] [blame]	396
				397	/*
				398	* Returns the result of multiplying the floating-point values `a' and
				399	* `b'. The operation is performed according to the IEC/IEEE Standard
				400	* for Binary Floating-Point Arithmetic.
				401	*/
				402	static FloatPartsN partsN(mul)(FloatPartsN a, FloatPartsN *b,
				403	float_status *s)
				404	{
				405	int ab_mask = float_cmask(a->cls) \| float_cmask(b->cls);
				406	bool sign = a->sign ^ b->sign;
				407
				408	if (likely(ab_mask == float_cmask_normal)) {
				409	FloatPartsW tmp;
				410
				411	frac_mulw(&tmp, a, b);
				412	frac_truncjam(a, &tmp);
				413
				414	a->exp += b->exp + 1;
				415	if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
				416	frac_add(a, a, a);
				417	a->exp -= 1;
				418	}
				419
				420	a->sign = sign;
				421	return a;
				422	}
				423
				424	/* Inf * Zero == NaN */
				425	if (unlikely(ab_mask == float_cmask_infzero)) {
Richard Henderson	bead3c9	2021-12-17 17:57:14 +0100	[diff] [blame]	426	float_raise(float_flag_invalid \| float_flag_invalid_imz, s);
Richard Henderson	aca8452	2020-11-11 20:44:57 -0800	[diff] [blame]	427	parts_default_nan(a, s);
				428	return a;
				429	}
				430
				431	if (unlikely(ab_mask & float_cmask_anynan)) {
				432	return parts_pick_nan(a, b, s);
				433	}
				434
				435	/* Multiply by 0 or Inf */
				436	if (ab_mask & float_cmask_inf) {
				437	a->cls = float_class_inf;
				438	a->sign = sign;
				439	return a;
				440	}
				441
				442	g_assert(ab_mask & float_cmask_zero);
				443	a->cls = float_class_zero;
				444	a->sign = sign;
				445	return a;
				446	}
Richard Henderson	dedd123	2020-10-24 06:04:19 -0700	[diff] [blame]	447
				448	/*
				449	* Returns the result of multiplying the floating-point values `a' and
				450	* `b' then adding 'c', with no intermediate rounding step after the
				451	* multiplication. The operation is performed according to the
				452	* IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
				453	* The flags argument allows the caller to select negation of the
				454	* addend, the intermediate product, or the final result. (The
				455	* difference between this and having the caller do a separate
				456	* negation is that negating externally will flip the sign bit on NaNs.)
				457	*
				458	* Requires A and C extracted into a double-sized structure to provide the
				459	* extra space for the widening multiply.
				460	*/
				461	static FloatPartsN partsN(muladd)(FloatPartsN a, FloatPartsN *b,
				462	FloatPartsN c, int flags, float_status s)
				463	{
				464	int ab_mask, abc_mask;
				465	FloatPartsW p_widen, c_widen;
				466
				467	ab_mask = float_cmask(a->cls) \| float_cmask(b->cls);
				468	abc_mask = float_cmask(c->cls) \| ab_mask;
				469
				470	/*
				471	* It is implementation-defined whether the cases of (0,inf,qnan)
				472	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
				473	* they return if they do), so we have to hand this information
				474	* off to the target-specific pick-a-NaN routine.
				475	*/
				476	if (unlikely(abc_mask & float_cmask_anynan)) {
				477	return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
				478	}
				479
				480	if (flags & float_muladd_negate_c) {
				481	c->sign ^= 1;
				482	}
				483
				484	/* Compute the sign of the product into A. */
				485	a->sign ^= b->sign;
				486	if (flags & float_muladd_negate_product) {
				487	a->sign ^= 1;
				488	}
				489
				490	if (unlikely(ab_mask != float_cmask_normal)) {
				491	if (unlikely(ab_mask == float_cmask_infzero)) {
Richard Henderson	bead3c9	2021-12-17 17:57:14 +0100	[diff] [blame]	492	float_raise(float_flag_invalid \| float_flag_invalid_imz, s);
Richard Henderson	dedd123	2020-10-24 06:04:19 -0700	[diff] [blame]	493	goto d_nan;
				494	}
				495
				496	if (ab_mask & float_cmask_inf) {
				497	if (c->cls == float_class_inf && a->sign != c->sign) {
Richard Henderson	ba11446	2021-12-17 17:57:14 +0100	[diff] [blame]	498	float_raise(float_flag_invalid \| float_flag_invalid_isi, s);
Richard Henderson	dedd123	2020-10-24 06:04:19 -0700	[diff] [blame]	499	goto d_nan;
				500	}
				501	goto return_inf;
				502	}
				503
				504	g_assert(ab_mask & float_cmask_zero);
				505	if (c->cls == float_class_normal) {
				506	a = c;
				507	goto return_normal;
				508	}
				509	if (c->cls == float_class_zero) {
				510	if (a->sign != c->sign) {
				511	goto return_sub_zero;
				512	}
				513	goto return_zero;
				514	}
				515	g_assert(c->cls == float_class_inf);
				516	}
				517
				518	if (unlikely(c->cls == float_class_inf)) {
				519	a->sign = c->sign;
				520	goto return_inf;
				521	}
				522
				523	/* Perform the multiplication step. */
				524	p_widen.sign = a->sign;
				525	p_widen.exp = a->exp + b->exp + 1;
				526	frac_mulw(&p_widen, a, b);
				527	if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
				528	frac_add(&p_widen, &p_widen, &p_widen);
				529	p_widen.exp -= 1;
				530	}
				531
				532	/* Perform the addition step. */
				533	if (c->cls != float_class_zero) {
				534	/* Zero-extend C to less significant bits. */
				535	frac_widen(&c_widen, c);
				536	c_widen.exp = c->exp;
				537
				538	if (a->sign == c->sign) {
				539	parts_add_normal(&p_widen, &c_widen);
				540	} else if (!parts_sub_normal(&p_widen, &c_widen)) {
				541	goto return_sub_zero;
				542	}
				543	}
				544
				545	/* Narrow with sticky bit, for proper rounding later. */
				546	frac_truncjam(a, &p_widen);
				547	a->sign = p_widen.sign;
				548	a->exp = p_widen.exp;
				549
				550	return_normal:
				551	if (flags & float_muladd_halve_result) {
				552	a->exp -= 1;
				553	}
				554	finish_sign:
				555	if (flags & float_muladd_negate_result) {
				556	a->sign ^= 1;
				557	}
				558	return a;
				559
				560	return_sub_zero:
				561	a->sign = s->float_rounding_mode == float_round_down;
				562	return_zero:
				563	a->cls = float_class_zero;
				564	goto finish_sign;
				565
				566	return_inf:
				567	a->cls = float_class_inf;
				568	goto finish_sign;
				569
				570	d_nan:
Richard Henderson	dedd123	2020-10-24 06:04:19 -0700	[diff] [blame]	571	parts_default_nan(a, s);
				572	return a;
				573	}
Richard Henderson	ec961b8	2020-11-11 12:50:44 -0800	[diff] [blame]	574
				575	/*
				576	* Returns the result of dividing the floating-point value `a' by the
				577	* corresponding value `b'. The operation is performed according to
				578	* the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				579	*/
				580	static FloatPartsN partsN(div)(FloatPartsN a, FloatPartsN *b,
				581	float_status *s)
				582	{
				583	int ab_mask = float_cmask(a->cls) \| float_cmask(b->cls);
				584	bool sign = a->sign ^ b->sign;
				585
				586	if (likely(ab_mask == float_cmask_normal)) {
				587	a->sign = sign;
				588	a->exp -= b->exp + frac_div(a, b);
				589	return a;
				590	}
				591
				592	/* 0/0 or Inf/Inf => NaN */
Richard Henderson	10cc964	2021-12-17 17:57:14 +0100	[diff] [blame]	593	if (unlikely(ab_mask == float_cmask_zero)) {
				594	float_raise(float_flag_invalid \| float_flag_invalid_zdz, s);
				595	goto d_nan;
				596	}
				597	if (unlikely(ab_mask == float_cmask_inf)) {
				598	float_raise(float_flag_invalid \| float_flag_invalid_idi, s);
				599	goto d_nan;
Richard Henderson	ec961b8	2020-11-11 12:50:44 -0800	[diff] [blame]	600	}
				601
				602	/* All the NaN cases */
				603	if (unlikely(ab_mask & float_cmask_anynan)) {
				604	return parts_pick_nan(a, b, s);
				605	}
				606
				607	a->sign = sign;
				608
				609	/* Inf / X */
				610	if (a->cls == float_class_inf) {
				611	return a;
				612	}
				613
				614	/* 0 / X */
				615	if (a->cls == float_class_zero) {
				616	return a;
				617	}
				618
				619	/* X / Inf */
				620	if (b->cls == float_class_inf) {
				621	a->cls = float_class_zero;
				622	return a;
				623	}
				624
				625	/* X / 0 => Inf */
				626	g_assert(b->cls == float_class_zero);
				627	float_raise(float_flag_divbyzero, s);
				628	a->cls = float_class_inf;
				629	return a;
Richard Henderson	10cc964	2021-12-17 17:57:14 +0100	[diff] [blame]	630
				631	d_nan:
				632	parts_default_nan(a, s);
				633	return a;
Richard Henderson	ec961b8	2020-11-11 12:50:44 -0800	[diff] [blame]	634	}
Richard Henderson	afc3493	2020-11-14 12:53:12 -0800	[diff] [blame]	635
				636	/*
Richard Henderson	feaf2e9	2021-05-07 18:40:28 -0700	[diff] [blame]	637	* Floating point remainder, per IEC/IEEE, or modulus.
				638	*/
				639	static FloatPartsN partsN(modrem)(FloatPartsN a, FloatPartsN *b,
				640	uint64_t mod_quot, float_status s)
				641	{
				642	int ab_mask = float_cmask(a->cls) \| float_cmask(b->cls);
				643
				644	if (likely(ab_mask == float_cmask_normal)) {
				645	frac_modrem(a, b, mod_quot);
				646	return a;
				647	}
				648
				649	if (mod_quot) {
				650	*mod_quot = 0;
				651	}
				652
				653	/* All the NaN cases */
				654	if (unlikely(ab_mask & float_cmask_anynan)) {
				655	return parts_pick_nan(a, b, s);
				656	}
				657
				658	/* Inf % N; N % 0 */
				659	if (a->cls == float_class_inf \|\| b->cls == float_class_zero) {
				660	float_raise(float_flag_invalid, s);
				661	parts_default_nan(a, s);
				662	return a;
				663	}
				664
				665	/* N % Inf; 0 % N */
				666	g_assert(b->cls == float_class_inf \|\| a->cls == float_class_zero);
				667	return a;
				668	}
				669
				670	/*
Richard Henderson	9261b24	2020-11-18 12:14:37 -0800	[diff] [blame]	671	* Square Root
				672	*
				673	* The base algorithm is lifted from
				674	* https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
				675	* https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
				676	* https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
				677	* and is thus MIT licenced.
				678	*/
				679	static void partsN(sqrt)(FloatPartsN a, float_status status,
				680	const FloatFmt *fmt)
				681	{
				682	const uint32_t three32 = 3u << 30;
				683	const uint64_t three64 = 3ull << 62;
				684	uint32_t d32, m32, r32, s32, u32; /* 32-bit computation */
				685	uint64_t d64, m64, r64, s64, u64; /* 64-bit computation */
				686	uint64_t dh, dl, rh, rl, sh, sl, uh, ul; /* 128-bit computation */
				687	uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
				688	uint64_t discard;
				689	bool exp_odd;
				690	size_t index;
				691
				692	if (unlikely(a->cls != float_class_normal)) {
				693	switch (a->cls) {
				694	case float_class_snan:
				695	case float_class_qnan:
				696	parts_return_nan(a, status);
				697	return;
				698	case float_class_zero:
				699	return;
				700	case float_class_inf:
				701	if (unlikely(a->sign)) {
				702	goto d_nan;
				703	}
				704	return;
				705	default:
				706	g_assert_not_reached();
				707	}
				708	}
				709
				710	if (unlikely(a->sign)) {
				711	goto d_nan;
				712	}
				713
				714	/*
				715	* Argument reduction.
				716	* x = 4^e frac; with integer e, and frac in [1, 4)
				717	* m = frac fixed point at bit 62, since we're in base 4.
				718	* If base-2 exponent is odd, exchange that for multiply by 2,
				719	* which results in no shift.
				720	*/
				721	exp_odd = a->exp & 1;
				722	index = extract64(a->frac_hi, 57, 6) \| (!exp_odd << 6);
				723	if (!exp_odd) {
				724	frac_shr(a, 1);
				725	}
				726
				727	/*
				728	* Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
				729	*
				730	* Initial estimate:
				731	* 7-bit lookup table (1-bit exponent and 6-bit significand).
				732	*
				733	* The relative error (e = r0*sqrt(m)-1) of a linear estimate
				734	* (r0 = a*m + b) is \|e\| < 0.085955 ~ 0x1.6p-4 at best;
				735	* a table lookup is faster and needs one less iteration.
				736	* The 7-bit table gives \|e\| < 0x1.fdp-9.
				737	*
				738	* A Newton-Raphson iteration for r is
				739	* s = m*r
				740	* d = s*r
				741	* u = 3 - d
				742	* r = r*u/2
				743	*
				744	* Fixed point representations:
				745	* m, s, d, u, three are all 2.30; r is 0.32
				746	*/
				747	m64 = a->frac_hi;
				748	m32 = m64 >> 32;
				749
				750	r32 = rsqrt_tab[index] << 16;
				751	/* \|rsqrt(m) - 1\| < 0x1.FDp-9 /
				752
				753	s32 = ((uint64_t)m32 * r32) >> 32;
				754	d32 = ((uint64_t)s32 * r32) >> 32;
				755	u32 = three32 - d32;
				756
				757	if (N == 64) {
				758	/* float64 or smaller */
				759
				760	r32 = ((uint64_t)r32 * u32) >> 31;
				761	/* \|rsqrt(m) - 1\| < 0x1.7Bp-16 /
				762
				763	s32 = ((uint64_t)m32 * r32) >> 32;
				764	d32 = ((uint64_t)s32 * r32) >> 32;
				765	u32 = three32 - d32;
				766
				767	if (fmt->frac_size <= 23) {
				768	/* float32 or smaller */
				769
				770	s32 = ((uint64_t)s32 * u32) >> 32; /* 3.29 */
				771	s32 = (s32 - 1) >> 6; /* 9.23 */
				772	/* s < sqrt(m) < s + 0x1.08p-23 */
				773
				774	/* compute nearest rounded result to 2.23 bits */
				775	uint32_t d0 = (m32 << 16) - s32 * s32;
				776	uint32_t d1 = s32 - d0;
				777	uint32_t d2 = d1 + s32 + 1;
				778	s32 += d1 >> 31;
				779	a->frac_hi = (uint64_t)s32 << (64 - 25);
				780
				781	/* increment or decrement for inexact */
				782	if (d2 != 0) {
				783	a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
				784	}
				785	goto done;
				786	}
				787
				788	/* float64 */
				789
				790	r64 = (uint64_t)r32 * u32 * 2;
				791	/* \|rsqrt(m) - 1\| < 0x1.37-p29; convert to 64-bit arithmetic /
				792	mul64To128(m64, r64, &s64, &discard);
				793	mul64To128(s64, r64, &d64, &discard);
				794	u64 = three64 - d64;
				795
				796	mul64To128(s64, u64, &s64, &discard); /* 3.61 */
				797	s64 = (s64 - 2) >> 9; /* 12.52 */
				798
				799	/* Compute nearest rounded result */
				800	uint64_t d0 = (m64 << 42) - s64 * s64;
				801	uint64_t d1 = s64 - d0;
				802	uint64_t d2 = d1 + s64 + 1;
				803	s64 += d1 >> 63;
				804	a->frac_hi = s64 << (64 - 54);
				805
				806	/* increment or decrement for inexact */
				807	if (d2 != 0) {
				808	a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
				809	}
				810	goto done;
				811	}
				812
				813	r64 = (uint64_t)r32 * u32 * 2;
				814	/* \|rsqrt(m) - 1\| < 0x1.7Bp-16; convert to 64-bit arithmetic /
				815
				816	mul64To128(m64, r64, &s64, &discard);
				817	mul64To128(s64, r64, &d64, &discard);
				818	u64 = three64 - d64;
				819	mul64To128(u64, r64, &r64, &discard);
				820	r64 <<= 1;
				821	/* \|rsqrt(m) - 1\| < 0x1.a5p-31 /
				822
				823	mul64To128(m64, r64, &s64, &discard);
				824	mul64To128(s64, r64, &d64, &discard);
				825	u64 = three64 - d64;
				826	mul64To128(u64, r64, &rh, &rl);
				827	add128(rh, rl, rh, rl, &rh, &rl);
				828	/* \|rsqrt(m) - 1\| < 0x1.c001p-59; change to 128-bit arithmetic /
				829
				830	mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
				831	mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
				832	sub128(three64, 0, dh, dl, &uh, &ul);
				833	mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard); /* 3.125 */
				834	/* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
				835
				836	sub128(sh, sl, 0, 4, &sh, &sl);
				837	shift128Right(sh, sl, 13, &sh, &sl); /* 16.112 */
				838	/* s < sqrt(m) < s + 1ulp */
				839
				840	/* Compute nearest rounded result */
				841	mul64To128(sl, sl, &d0h, &d0l);
				842	d0h += 2 * sh * sl;
				843	sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
				844	sub128(sh, sl, d0h, d0l, &d1h, &d1l);
				845	add128(sh, sl, 0, 1, &d2h, &d2l);
				846	add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
				847	add128(sh, sl, 0, d1h >> 63, &sh, &sl);
				848	shift128Left(sh, sl, 128 - 114, &sh, &sl);
				849
				850	/* increment or decrement for inexact */
				851	if (d2h \| d2l) {
				852	if ((int64_t)(d1h ^ d2h) < 0) {
				853	sub128(sh, sl, 0, 1, &sh, &sl);
				854	} else {
				855	add128(sh, sl, 0, 1, &sh, &sl);
				856	}
				857	}
				858	a->frac_lo = sl;
				859	a->frac_hi = sh;
				860
				861	done:
				862	/* Convert back from base 4 to base 2. */
				863	a->exp >>= 1;
				864	if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
				865	frac_add(a, a, a);
				866	} else {
				867	a->exp += 1;
				868	}
				869	return;
				870
				871	d_nan:
Richard Henderson	f8718aa	2021-12-17 17:57:14 +0100	[diff] [blame]	872	float_raise(float_flag_invalid \| float_flag_invalid_sqrt, status);
Richard Henderson	9261b24	2020-11-18 12:14:37 -0800	[diff] [blame]	873	parts_default_nan(a, status);
				874	}
				875
				876	/*
Richard Henderson	afc3493	2020-11-14 12:53:12 -0800	[diff] [blame]	877	* Rounds the floating-point value `a' to an integer, and returns the
				878	* result as a floating-point value. The operation is performed
				879	* according to the IEC/IEEE Standard for Binary Floating-Point
				880	* Arithmetic.
				881	*
				882	* parts_round_to_int_normal is an internal helper function for
				883	* normal numbers only, returning true for inexact but not directly
				884	* raising float_flag_inexact.
				885	*/
				886	static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
				887	int scale, int frac_size)
				888	{
				889	uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
				890	int shift_adj;
				891
				892	scale = MIN(MAX(scale, -0x10000), 0x10000);
				893	a->exp += scale;
				894
				895	if (a->exp < 0) {
				896	bool one;
				897
				898	/* All fractional */
				899	switch (rmode) {
				900	case float_round_nearest_even:
				901	one = false;
				902	if (a->exp == -1) {
				903	FloatPartsN tmp;
				904	/* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
				905	frac_add(&tmp, a, a);
				906	/* Anything remaining means frac > 0.5. */
				907	one = !frac_eqz(&tmp);
				908	}
				909	break;
				910	case float_round_ties_away:
				911	one = a->exp == -1;
				912	break;
				913	case float_round_to_zero:
				914	one = false;
				915	break;
				916	case float_round_up:
				917	one = !a->sign;
				918	break;
				919	case float_round_down:
				920	one = a->sign;
				921	break;
				922	case float_round_to_odd:
				923	one = true;
				924	break;
				925	default:
				926	g_assert_not_reached();
				927	}
				928
				929	frac_clear(a);
				930	a->exp = 0;
				931	if (one) {
				932	a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
				933	} else {
				934	a->cls = float_class_zero;
				935	}
				936	return true;
				937	}
				938
				939	if (a->exp >= frac_size) {
				940	/* All integral */
				941	return false;
				942	}
				943
				944	if (N > 64 && a->exp < N - 64) {
				945	/*
				946	* Rounding is not in the low word -- shift lsb to bit 2,
				947	* which leaves room for sticky and rounding bit.
				948	*/
				949	shift_adj = (N - 1) - (a->exp + 2);
				950	frac_shrjam(a, shift_adj);
				951	frac_lsb = 1 << 2;
				952	} else {
				953	shift_adj = 0;
				954	frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
				955	}
				956
				957	frac_lsbm1 = frac_lsb >> 1;
				958	rnd_mask = frac_lsb - 1;
				959	rnd_even_mask = rnd_mask \| frac_lsb;
				960
				961	if (!(a->frac_lo & rnd_mask)) {
				962	/* Fractional bits already clear, undo the shift above. */
				963	frac_shl(a, shift_adj);
				964	return false;
				965	}
				966
				967	switch (rmode) {
				968	case float_round_nearest_even:
				969	inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
				970	break;
				971	case float_round_ties_away:
				972	inc = frac_lsbm1;
				973	break;
				974	case float_round_to_zero:
				975	inc = 0;
				976	break;
				977	case float_round_up:
				978	inc = a->sign ? 0 : rnd_mask;
				979	break;
				980	case float_round_down:
				981	inc = a->sign ? rnd_mask : 0;
				982	break;
				983	case float_round_to_odd:
				984	inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
				985	break;
				986	default:
				987	g_assert_not_reached();
				988	}
				989
				990	if (shift_adj == 0) {
				991	if (frac_addi(a, a, inc)) {
				992	frac_shr(a, 1);
				993	a->frac_hi \|= DECOMPOSED_IMPLICIT_BIT;
				994	a->exp++;
				995	}
				996	a->frac_lo &= ~rnd_mask;
				997	} else {
				998	frac_addi(a, a, inc);
				999	a->frac_lo &= ~rnd_mask;
				1000	/* Be careful shifting back, not to overflow */
				1001	frac_shl(a, shift_adj - 1);
				1002	if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
				1003	a->exp++;
				1004	} else {
				1005	frac_add(a, a, a);
				1006	}
				1007	}
				1008	return true;
				1009	}
				1010
				1011	static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
				1012	int scale, float_status *s,
				1013	const FloatFmt *fmt)
				1014	{
				1015	switch (a->cls) {
				1016	case float_class_qnan:
				1017	case float_class_snan:
				1018	parts_return_nan(a, s);
				1019	break;
				1020	case float_class_zero:
				1021	case float_class_inf:
				1022	break;
				1023	case float_class_normal:
				1024	if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
				1025	float_raise(float_flag_inexact, s);
				1026	}
				1027	break;
				1028	default:
				1029	g_assert_not_reached();
				1030	}
				1031	}
Richard Henderson	463b3f0	2020-11-14 13:21:43 -0800	[diff] [blame]	1032
				1033	/*
				1034	* Returns the result of converting the floating-point value `a' to
				1035	* the two's complement integer format. The conversion is performed
				1036	* according to the IEC/IEEE Standard for Binary Floating-Point
				1037	* Arithmetic---which means in particular that the conversion is
				1038	* rounded according to the current rounding mode. If `a' is a NaN,
				1039	* the largest positive integer is returned. Otherwise, if the
				1040	* conversion overflows, the largest integer with the same sign as `a'
				1041	* is returned.
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1042	*/
Richard Henderson	463b3f0	2020-11-14 13:21:43 -0800	[diff] [blame]	1043	static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
				1044	int scale, int64_t min, int64_t max,
				1045	float_status *s)
				1046	{
				1047	int flags = 0;
				1048	uint64_t r;
				1049
				1050	switch (p->cls) {
				1051	case float_class_snan:
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	1052	flags \|= float_flag_invalid_snan;
				1053	/* fall through */
Richard Henderson	463b3f0	2020-11-14 13:21:43 -0800	[diff] [blame]	1054	case float_class_qnan:
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	1055	flags \|= float_flag_invalid;
Richard Henderson	463b3f0	2020-11-14 13:21:43 -0800	[diff] [blame]	1056	r = max;
				1057	break;
				1058
				1059	case float_class_inf:
Richard Henderson	81254b0	2021-12-17 17:57:14 +0100	[diff] [blame]	1060	flags = float_flag_invalid \| float_flag_invalid_cvti;
Richard Henderson	463b3f0	2020-11-14 13:21:43 -0800	[diff] [blame]	1061	r = p->sign ? min : max;
				1062	break;
				1063
				1064	case float_class_zero:
				1065	return 0;
				1066
				1067	case float_class_normal:
				1068	/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
				1069	if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
				1070	flags = float_flag_inexact;
				1071	}
				1072
				1073	if (p->exp <= DECOMPOSED_BINARY_POINT) {
				1074	r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
				1075	} else {
				1076	r = UINT64_MAX;
				1077	}
				1078	if (p->sign) {
				1079	if (r <= -(uint64_t)min) {
				1080	r = -r;
				1081	} else {
Richard Henderson	81254b0	2021-12-17 17:57:14 +0100	[diff] [blame]	1082	flags = float_flag_invalid \| float_flag_invalid_cvti;
Richard Henderson	463b3f0	2020-11-14 13:21:43 -0800	[diff] [blame]	1083	r = min;
				1084	}
				1085	} else if (r > max) {
Richard Henderson	81254b0	2021-12-17 17:57:14 +0100	[diff] [blame]	1086	flags = float_flag_invalid \| float_flag_invalid_cvti;
Richard Henderson	463b3f0	2020-11-14 13:21:43 -0800	[diff] [blame]	1087	r = max;
				1088	}
				1089	break;
				1090
				1091	default:
				1092	g_assert_not_reached();
				1093	}
				1094
				1095	float_raise(flags, s);
				1096	return r;
				1097	}
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1098
				1099	/*
				1100	* Returns the result of converting the floating-point value `a' to
				1101	* the unsigned integer format. The conversion is performed according
				1102	* to the IEC/IEEE Standard for Binary Floating-Point
				1103	* Arithmetic---which means in particular that the conversion is
				1104	* rounded according to the current rounding mode. If `a' is a NaN,
				1105	* the largest unsigned integer is returned. Otherwise, if the
				1106	* conversion overflows, the largest unsigned integer is returned. If
				1107	* the 'a' is negative, the result is rounded and zero is returned;
				1108	* values that do not round to zero will raise the inexact exception
				1109	* flag.
				1110	*/
				1111	static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
				1112	int scale, uint64_t max, float_status *s)
				1113	{
				1114	int flags = 0;
				1115	uint64_t r;
				1116
				1117	switch (p->cls) {
				1118	case float_class_snan:
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	1119	flags \|= float_flag_invalid_snan;
				1120	/* fall through */
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1121	case float_class_qnan:
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	1122	flags \|= float_flag_invalid;
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1123	r = max;
				1124	break;
				1125
				1126	case float_class_inf:
Richard Henderson	81254b0	2021-12-17 17:57:14 +0100	[diff] [blame]	1127	flags = float_flag_invalid \| float_flag_invalid_cvti;
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1128	r = p->sign ? 0 : max;
				1129	break;
				1130
				1131	case float_class_zero:
				1132	return 0;
				1133
				1134	case float_class_normal:
				1135	/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
				1136	if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
				1137	flags = float_flag_inexact;
				1138	if (p->cls == float_class_zero) {
				1139	r = 0;
				1140	break;
				1141	}
				1142	}
				1143
				1144	if (p->sign) {
Richard Henderson	81254b0	2021-12-17 17:57:14 +0100	[diff] [blame]	1145	flags = float_flag_invalid \| float_flag_invalid_cvti;
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1146	r = 0;
				1147	} else if (p->exp > DECOMPOSED_BINARY_POINT) {
Richard Henderson	81254b0	2021-12-17 17:57:14 +0100	[diff] [blame]	1148	flags = float_flag_invalid \| float_flag_invalid_cvti;
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1149	r = max;
				1150	} else {
				1151	r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
				1152	if (r > max) {
Richard Henderson	81254b0	2021-12-17 17:57:14 +0100	[diff] [blame]	1153	flags = float_flag_invalid \| float_flag_invalid_cvti;
Richard Henderson	4ab4aef	2020-11-14 14:21:16 -0800	[diff] [blame]	1154	r = max;
				1155	}
				1156	}
				1157	break;
				1158
				1159	default:
				1160	g_assert_not_reached();
				1161	}
				1162
				1163	float_raise(flags, s);
				1164	return r;
				1165	}
Richard Henderson	e368951	2020-11-14 14:40:27 -0800	[diff] [blame]	1166
				1167	/*
				1168	* Integer to float conversions
				1169	*
				1170	* Returns the result of converting the two's complement integer `a'
				1171	* to the floating-point format. The conversion is performed according
				1172	* to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1173	*/
				1174	static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
				1175	int scale, float_status *s)
				1176	{
				1177	uint64_t f = a;
				1178	int shift;
				1179
				1180	memset(p, 0, sizeof(*p));
				1181
				1182	if (a == 0) {
				1183	p->cls = float_class_zero;
				1184	return;
				1185	}
				1186
				1187	p->cls = float_class_normal;
				1188	if (a < 0) {
				1189	f = -f;
				1190	p->sign = true;
				1191	}
				1192	shift = clz64(f);
				1193	scale = MIN(MAX(scale, -0x10000), 0x10000);
				1194
				1195	p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
				1196	p->frac_hi = f << shift;
				1197	}
Richard Henderson	37c954a	2020-11-14 14:48:31 -0800	[diff] [blame]	1198
				1199	/*
				1200	* Unsigned Integer to float conversions
				1201	*
				1202	* Returns the result of converting the unsigned integer `a' to the
				1203	* floating-point format. The conversion is performed according to the
				1204	* IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1205	*/
				1206	static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
				1207	int scale, float_status *status)
				1208	{
				1209	memset(p, 0, sizeof(*p));
				1210
				1211	if (a == 0) {
				1212	p->cls = float_class_zero;
				1213	} else {
				1214	int shift = clz64(a);
				1215	scale = MIN(MAX(scale, -0x10000), 0x10000);
				1216	p->cls = float_class_normal;
				1217	p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
				1218	p->frac_hi = a << shift;
				1219	}
				1220	}
Richard Henderson	e1c4667	2020-11-14 16:52:38 -0800	[diff] [blame]	1221
				1222	/*
				1223	* Float min/max.
				1224	*/
				1225	static FloatPartsN partsN(minmax)(FloatPartsN a, FloatPartsN *b,
				1226	float_status *s, int flags)
				1227	{
				1228	int ab_mask = float_cmask(a->cls) \| float_cmask(b->cls);
				1229	int a_exp, b_exp, cmp;
				1230
				1231	if (unlikely(ab_mask & float_cmask_anynan)) {
				1232	/*
Chih-Min Chao	0e90303	2021-10-22 00:08:45 +0800	[diff] [blame]	1233	* For minNum/maxNum (IEEE 754-2008)
				1234	* or minimumNumber/maximumNumber (IEEE 754-2019),
				1235	* if one operand is a QNaN, and the other
Richard Henderson	e1c4667	2020-11-14 16:52:38 -0800	[diff] [blame]	1236	* operand is numerical, then return numerical argument.
				1237	*/
Chih-Min Chao	0e90303	2021-10-22 00:08:45 +0800	[diff] [blame]	1238	if ((flags & (minmax_isnum \| minmax_isnumber))
Richard Henderson	e1c4667	2020-11-14 16:52:38 -0800	[diff] [blame]	1239	&& !(ab_mask & float_cmask_snan)
				1240	&& (ab_mask & ~float_cmask_qnan)) {
				1241	return is_nan(a->cls) ? b : a;
				1242	}
Chih-Min Chao	0e90303	2021-10-22 00:08:45 +0800	[diff] [blame]	1243
				1244	/*
				1245	* In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
				1246	* are removed and replaced with minimum, minimumNumber, maximum
				1247	* and maximumNumber.
				1248	* minimumNumber/maximumNumber behavior for SNaN is changed to:
				1249	* If both operands are NaNs, a QNaN is returned.
				1250	* If either operand is a SNaN,
				1251	* an invalid operation exception is signaled,
				1252	* but unless both operands are NaNs,
				1253	* the SNaN is otherwise ignored and not converted to a QNaN.
				1254	*/
				1255	if ((flags & minmax_isnumber)
				1256	&& (ab_mask & float_cmask_snan)
				1257	&& (ab_mask & ~float_cmask_anynan)) {
				1258	float_raise(float_flag_invalid, s);
				1259	return is_nan(a->cls) ? b : a;
				1260	}
				1261
Richard Henderson	e1c4667	2020-11-14 16:52:38 -0800	[diff] [blame]	1262	return parts_pick_nan(a, b, s);
				1263	}
				1264
				1265	a_exp = a->exp;
				1266	b_exp = b->exp;
				1267
				1268	if (unlikely(ab_mask != float_cmask_normal)) {
				1269	switch (a->cls) {
				1270	case float_class_normal:
				1271	break;
				1272	case float_class_inf:
				1273	a_exp = INT16_MAX;
				1274	break;
				1275	case float_class_zero:
				1276	a_exp = INT16_MIN;
				1277	break;
				1278	default:
				1279	g_assert_not_reached();
				1280	break;
				1281	}
				1282	switch (b->cls) {
				1283	case float_class_normal:
				1284	break;
				1285	case float_class_inf:
				1286	b_exp = INT16_MAX;
				1287	break;
				1288	case float_class_zero:
				1289	b_exp = INT16_MIN;
				1290	break;
				1291	default:
				1292	g_assert_not_reached();
				1293	break;
				1294	}
				1295	}
				1296
				1297	/* Compare magnitudes. */
				1298	cmp = a_exp - b_exp;
				1299	if (cmp == 0) {
				1300	cmp = frac_cmp(a, b);
				1301	}
				1302
				1303	/*
				1304	* Take the sign into account.
				1305	* For ismag, only do this if the magnitudes are equal.
				1306	*/
				1307	if (!(flags & minmax_ismag) \|\| cmp == 0) {
				1308	if (a->sign != b->sign) {
				1309	/* For differing signs, the negative operand is less. */
				1310	cmp = a->sign ? -1 : 1;
				1311	} else if (a->sign) {
				1312	/* For two negative operands, invert the magnitude comparison. */
				1313	cmp = -cmp;
				1314	}
				1315	}
				1316
				1317	if (flags & minmax_ismin) {
				1318	cmp = -cmp;
				1319	}
				1320	return cmp < 0 ? b : a;
				1321	}
Richard Henderson	6eb169b	2020-11-14 19:20:36 -0800	[diff] [blame]	1322
				1323	/*
				1324	* Floating point compare
				1325	*/
				1326	static FloatRelation partsN(compare)(FloatPartsN a, FloatPartsN b,
				1327	float_status *s, bool is_quiet)
				1328	{
				1329	int ab_mask = float_cmask(a->cls) \| float_cmask(b->cls);
Richard Henderson	6eb169b	2020-11-14 19:20:36 -0800	[diff] [blame]	1330
				1331	if (likely(ab_mask == float_cmask_normal)) {
Richard Henderson	9343c88	2022-04-01 07:22:39 -0600	[diff] [blame]	1332	FloatRelation cmp;
				1333
Richard Henderson	6eb169b	2020-11-14 19:20:36 -0800	[diff] [blame]	1334	if (a->sign != b->sign) {
				1335	goto a_sign;
				1336	}
Richard Henderson	9343c88	2022-04-01 07:22:39 -0600	[diff] [blame]	1337	if (a->exp == b->exp) {
Richard Henderson	6eb169b	2020-11-14 19:20:36 -0800	[diff] [blame]	1338	cmp = frac_cmp(a, b);
Richard Henderson	9343c88	2022-04-01 07:22:39 -0600	[diff] [blame]	1339	} else if (a->exp < b->exp) {
				1340	cmp = float_relation_less;
				1341	} else {
				1342	cmp = float_relation_greater;
Richard Henderson	6eb169b	2020-11-14 19:20:36 -0800	[diff] [blame]	1343	}
				1344	if (a->sign) {
				1345	cmp = -cmp;
				1346	}
				1347	return cmp;
				1348	}
				1349
				1350	if (unlikely(ab_mask & float_cmask_anynan)) {
Richard Henderson	e706d44	2021-12-17 17:57:14 +0100	[diff] [blame]	1351	if (ab_mask & float_cmask_snan) {
				1352	float_raise(float_flag_invalid \| float_flag_invalid_snan, s);
				1353	} else if (!is_quiet) {
Richard Henderson	6eb169b	2020-11-14 19:20:36 -0800	[diff] [blame]	1354	float_raise(float_flag_invalid, s);
				1355	}
				1356	return float_relation_unordered;
				1357	}
				1358
				1359	if (ab_mask & float_cmask_zero) {
				1360	if (ab_mask == float_cmask_zero) {
				1361	return float_relation_equal;
				1362	} else if (a->cls == float_class_zero) {
				1363	goto b_sign;
				1364	} else {
				1365	goto a_sign;
				1366	}
				1367	}
				1368
				1369	if (ab_mask == float_cmask_inf) {
				1370	if (a->sign == b->sign) {
				1371	return float_relation_equal;
				1372	}
				1373	} else if (b->cls == float_class_inf) {
				1374	goto b_sign;
				1375	} else {
				1376	g_assert(a->cls == float_class_inf);
				1377	}
				1378
				1379	a_sign:
				1380	return a->sign ? float_relation_less : float_relation_greater;
				1381	b_sign:
				1382	return b->sign ? float_relation_greater : float_relation_less;
				1383	}
Richard Henderson	39626b0	2020-11-14 20:28:02 -0800	[diff] [blame]	1384
				1385	/*
				1386	* Multiply A by 2 raised to the power N.
				1387	*/
				1388	static void partsN(scalbn)(FloatPartsN a, int n, float_status s)
				1389	{
				1390	switch (a->cls) {
				1391	case float_class_snan:
				1392	case float_class_qnan:
				1393	parts_return_nan(a, s);
				1394	break;
				1395	case float_class_zero:
				1396	case float_class_inf:
				1397	break;
				1398	case float_class_normal:
				1399	a->exp += MIN(MAX(n, -0x10000), 0x10000);
				1400	break;
				1401	default:
				1402	g_assert_not_reached();
				1403	}
				1404	}
Richard Henderson	2fa3546	2020-11-22 10:42:22 -0800	[diff] [blame]	1405
				1406	/*
				1407	* Return log2(A)
				1408	*/
				1409	static void partsN(log2)(FloatPartsN a, float_status s, const FloatFmt *fmt)
				1410	{
				1411	uint64_t a0, a1, r, t, ign;
				1412	FloatPartsN f;
				1413	int i, n, a_exp, f_exp;
				1414
				1415	if (unlikely(a->cls != float_class_normal)) {
				1416	switch (a->cls) {
				1417	case float_class_snan:
				1418	case float_class_qnan:
				1419	parts_return_nan(a, s);
				1420	return;
				1421	case float_class_zero:
				1422	/* log2(0) = -inf */
				1423	a->cls = float_class_inf;
				1424	a->sign = 1;
				1425	return;
				1426	case float_class_inf:
				1427	if (unlikely(a->sign)) {
				1428	goto d_nan;
				1429	}
				1430	return;
				1431	default:
				1432	break;
				1433	}
				1434	g_assert_not_reached();
				1435	}
				1436	if (unlikely(a->sign)) {
				1437	goto d_nan;
				1438	}
				1439
				1440	/* TODO: This algorithm looses bits too quickly for float128. */
				1441	g_assert(N == 64);
				1442
				1443	a_exp = a->exp;
				1444	f_exp = -1;
				1445
				1446	r = 0;
				1447	t = DECOMPOSED_IMPLICIT_BIT;
				1448	a0 = a->frac_hi;
				1449	a1 = 0;
				1450
				1451	n = fmt->frac_size + 2;
				1452	if (unlikely(a_exp == -1)) {
				1453	/*
				1454	* When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
				1455	* When the value is very close to 1.0, there are lots of 1's in
				1456	* the msb parts of the fraction. At the end, when we subtract
				1457	* this value from -1.0, we can see a catastrophic loss of precision,
				1458	* as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
				1459	* bits of y in the final result. To minimize this, compute as many
				1460	* digits as we can.
				1461	* ??? This case needs another algorithm to avoid this.
				1462	*/
				1463	n = fmt->frac_size * 2 + 2;
				1464	/* Don't compute a value overlapping the sticky bit */
				1465	n = MIN(n, 62);
				1466	}
				1467
				1468	for (i = 0; i < n; i++) {
				1469	if (a1) {
				1470	mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
				1471	} else if (a0 & 0xffffffffull) {
				1472	mul64To128(a0, a0, &a0, &a1);
				1473	} else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
				1474	a0 >>= 32;
				1475	a0 *= a0;
				1476	} else {
				1477	goto exact;
				1478	}
				1479
				1480	if (a0 & DECOMPOSED_IMPLICIT_BIT) {
				1481	if (unlikely(a_exp == 0 && r == 0)) {
				1482	/*
				1483	* When a_exp == 0, we're computing the log2 of a value
				1484	* [1.0,2.0). When the value is very close to 1.0, there
				1485	* are lots of 0's in the msb parts of the fraction.
				1486	* We need to compute more digits to produce a correct
				1487	* result -- restart at the top of the fraction.
				1488	* ??? This is likely to lose precision quickly, as for
				1489	* float128; we may need another method.
				1490	*/
				1491	f_exp -= i;
				1492	t = r = DECOMPOSED_IMPLICIT_BIT;
				1493	i = 0;
				1494	} else {
				1495	r \|= t;
				1496	}
				1497	} else {
				1498	add128(a0, a1, a0, a1, &a0, &a1);
				1499	}
				1500	t >>= 1;
				1501	}
				1502
				1503	/* Set sticky for inexact. */
				1504	r \|= (a1 \|\| a0 & ~DECOMPOSED_IMPLICIT_BIT);
				1505
				1506	exact:
				1507	parts_sint_to_float(a, a_exp, 0, s);
				1508	if (r == 0) {
				1509	return;
				1510	}
				1511
				1512	memset(&f, 0, sizeof(f));
				1513	f.cls = float_class_normal;
				1514	f.frac_hi = r;
				1515	f.exp = f_exp - frac_normalize(&f);
				1516
				1517	if (a_exp < 0) {
				1518	parts_sub_normal(a, &f);
				1519	} else if (a_exp > 0) {
				1520	parts_add_normal(a, &f);
				1521	} else {
				1522	*a = f;
				1523	}
				1524	return;
				1525
				1526	d_nan:
				1527	float_raise(float_flag_invalid, s);
				1528	parts_default_nan(a, s);
				1529	}