Blame - fpu/softfloat.c - qemu

blob: 9274ebf1011cd369645eeacccbe02de49116864a [file] [log] [blame]

Andreas Färber	8d725fa	2011-03-07 01:34:04 +0100	[diff] [blame]	1	/*
				2	* QEMU float support
				3	*
				4	* Derived from SoftFloat.
				5	*/
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6
				7	/*============================================================================
				8
				9	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
				10	Package, Release 2b.
				11
				12	Written by John R. Hauser. This work was made possible in part by the
				13	International Computer Science Institute, located at Suite 600, 1947 Center
				14	Street, Berkeley, California 94704. Funding was partially provided by the
				15	National Science Foundation under grant MIP-9311980. The original version
				16	of this code was written as part of a project to build a fixed-point vector
				17	processor in collaboration with the University of California at Berkeley,
				18	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				19	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
				20	arithmetic/SoftFloat.html'.
				21
				22	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
				23	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
				24	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
				25	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
				26	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
				27	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
				28	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
				29	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
				30
				31	Derivative works are acceptable, even for commercial purposes, so long as
				32	(1) the source code for the derivative work includes prominent notice that
				33	the work is derivative, and (2) the source code includes prominent notice with
				34	these four paragraphs for those parts of this code that are retained.
				35
				36	=============================================================================*/
				37
Peter Maydell	2ac8bd0	2011-09-26 16:56:55 +0100	[diff] [blame]	38	/* softfloat (and in particular the code in softfloat-specialize.h) is
				39	* target-dependent and needs the TARGET_* macros.
				40	*/
				41	#include "config.h"
				42
Paolo Bonzini	6b4c305	2012-10-24 13:12:00 +0200	[diff] [blame]	43	#include "fpu/softfloat.h"
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	44
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	45	/* We only need stdlib for abort() */
				46	#include <stdlib.h>
				47
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	48	/*----------------------------------------------------------------------------
				49	\| Primitive arithmetic functions, including multi-word arithmetic, and
				50	\| division and square root approximations. (Can be specialized to target if
				51	\| desired.)
				52	----------------------------------------------------------------------------/
				53	#include "softfloat-macros.h"
				54
				55	/*----------------------------------------------------------------------------
				56	\| Functions and definitions to determine: (1) whether tininess for underflow
				57	\| is detected before or after rounding by default, (2) what (if anything)
				58	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
				59	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				60	\| are propagated from function inputs to output. These details are target-
				61	\| specific.
				62	----------------------------------------------------------------------------/
				63	#include "softfloat-specialize.h"
				64
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	65	/*----------------------------------------------------------------------------
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	66	\| Returns the fraction bits of the half-precision floating-point value `a'.
				67	----------------------------------------------------------------------------/
				68
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	69	static inline uint32_t extractFloat16Frac(float16 a)
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	70	{
				71	return float16_val(a) & 0x3ff;
				72	}
				73
				74	/*----------------------------------------------------------------------------
				75	\| Returns the exponent bits of the half-precision floating-point value `a'.
				76	----------------------------------------------------------------------------/
				77
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	78	static inline int_fast16_t extractFloat16Exp(float16 a)
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	79	{
				80	return (float16_val(a) >> 10) & 0x1f;
				81	}
				82
				83	/*----------------------------------------------------------------------------
				84	\| Returns the sign bit of the single-precision floating-point value `a'.
				85	----------------------------------------------------------------------------/
				86
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	87	static inline flag extractFloat16Sign(float16 a)
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	88	{
				89	return float16_val(a)>>15;
				90	}
				91
				92	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	93	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				94	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
				95	\| input. If `zSign' is 1, the input is negated before being converted to an
				96	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
				97	\| is simply rounded to an integer, with the inexact exception raised if the
				98	\| input cannot be represented exactly as an integer. However, if the fixed-
				99	\| point input is too large, the invalid exception is raised and the largest
				100	\| positive or negative integer is returned.
				101	----------------------------------------------------------------------------/
				102
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	103	static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	104	{
				105	int8 roundingMode;
				106	flag roundNearestEven;
				107	int8 roundIncrement, roundBits;
Peter Maydell	760e141	2012-04-05 19:12:35 +0100	[diff] [blame]	108	int32_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	109
				110	roundingMode = STATUS(float_rounding_mode);
				111	roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	112	switch (roundingMode) {
				113	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	114	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	115	roundIncrement = 0x40;
				116	break;
				117	case float_round_to_zero:
				118	roundIncrement = 0;
				119	break;
				120	case float_round_up:
				121	roundIncrement = zSign ? 0 : 0x7f;
				122	break;
				123	case float_round_down:
				124	roundIncrement = zSign ? 0x7f : 0;
				125	break;
				126	default:
				127	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	128	}
				129	roundBits = absZ & 0x7F;
				130	absZ = ( absZ + roundIncrement )>>7;
				131	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				132	z = absZ;
				133	if ( zSign ) z = - z;
				134	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				135	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	136	return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	137	}
				138	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				139	return z;
				140
				141	}
				142
				143	/*----------------------------------------------------------------------------
				144	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
				145	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
				146	\| and returns the properly rounded 64-bit integer corresponding to the input.
				147	\| If `zSign' is 1, the input is negated before being converted to an integer.
				148	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
				149	\| the inexact exception raised if the input cannot be represented exactly as
				150	\| an integer. However, if the fixed-point input is too large, the invalid
				151	\| exception is raised and the largest positive or negative integer is
				152	\| returned.
				153	----------------------------------------------------------------------------/
				154
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	155	static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	156	{
				157	int8 roundingMode;
				158	flag roundNearestEven, increment;
Peter Maydell	760e141	2012-04-05 19:12:35 +0100	[diff] [blame]	159	int64_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	160
				161	roundingMode = STATUS(float_rounding_mode);
				162	roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	163	switch (roundingMode) {
				164	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	165	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	166	increment = ((int64_t) absZ1 < 0);
				167	break;
				168	case float_round_to_zero:
				169	increment = 0;
				170	break;
				171	case float_round_up:
				172	increment = !zSign && absZ1;
				173	break;
				174	case float_round_down:
				175	increment = zSign && absZ1;
				176	break;
				177	default:
				178	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	179	}
				180	if ( increment ) {
				181	++absZ0;
				182	if ( absZ0 == 0 ) goto overflow;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	183	absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	184	}
				185	z = absZ0;
				186	if ( zSign ) z = - z;
				187	if ( z && ( ( z < 0 ) ^ zSign ) ) {
				188	overflow:
				189	float_raise( float_flag_invalid STATUS_VAR);
				190	return
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	191	zSign ? (int64_t) LIT64( 0x8000000000000000 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	192	: LIT64( 0x7FFFFFFFFFFFFFFF );
				193	}
				194	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				195	return z;
				196
				197	}
				198
				199	/*----------------------------------------------------------------------------
Tom Musta	fb3ea83	2014-01-07 17:17:49 +0000	[diff] [blame]	200	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
				201	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
				202	\| and returns the properly rounded 64-bit unsigned integer corresponding to the
				203	\| input. Ordinarily, the fixed-point input is simply rounded to an integer,
				204	\| with the inexact exception raised if the input cannot be represented exactly
				205	\| as an integer. However, if the fixed-point input is too large, the invalid
				206	\| exception is raised and the largest unsigned integer is returned.
				207	----------------------------------------------------------------------------/
				208
				209	static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
				210	uint64_t absZ1 STATUS_PARAM)
				211	{
				212	int8 roundingMode;
				213	flag roundNearestEven, increment;
				214
				215	roundingMode = STATUS(float_rounding_mode);
				216	roundNearestEven = (roundingMode == float_round_nearest_even);
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	217	switch (roundingMode) {
				218	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	219	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	220	increment = ((int64_t)absZ1 < 0);
				221	break;
				222	case float_round_to_zero:
				223	increment = 0;
				224	break;
				225	case float_round_up:
				226	increment = !zSign && absZ1;
				227	break;
				228	case float_round_down:
				229	increment = zSign && absZ1;
				230	break;
				231	default:
				232	abort();
Tom Musta	fb3ea83	2014-01-07 17:17:49 +0000	[diff] [blame]	233	}
				234	if (increment) {
				235	++absZ0;
				236	if (absZ0 == 0) {
				237	float_raise(float_flag_invalid STATUS_VAR);
				238	return LIT64(0xFFFFFFFFFFFFFFFF);
				239	}
				240	absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
				241	}
				242
				243	if (zSign && absZ0) {
				244	float_raise(float_flag_invalid STATUS_VAR);
				245	return 0;
				246	}
				247
				248	if (absZ1) {
				249	STATUS(float_exception_flags) \|= float_flag_inexact;
				250	}
				251	return absZ0;
				252	}
				253
				254	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	255	\| Returns the fraction bits of the single-precision floating-point value `a'.
				256	----------------------------------------------------------------------------/
				257
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	258	static inline uint32_t extractFloat32Frac( float32 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	259	{
				260
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	261	return float32_val(a) & 0x007FFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	262
				263	}
				264
				265	/*----------------------------------------------------------------------------
				266	\| Returns the exponent bits of the single-precision floating-point value `a'.
				267	----------------------------------------------------------------------------/
				268
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	269	static inline int_fast16_t extractFloat32Exp(float32 a)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	270	{
				271
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	272	return ( float32_val(a)>>23 ) & 0xFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	273
				274	}
				275
				276	/*----------------------------------------------------------------------------
				277	\| Returns the sign bit of the single-precision floating-point value `a'.
				278	----------------------------------------------------------------------------/
				279
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	280	static inline flag extractFloat32Sign( float32 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	281	{
				282
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	283	return float32_val(a)>>31;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	284
				285	}
				286
				287	/*----------------------------------------------------------------------------
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	288	\| If `a' is denormal and we are in flush-to-zero mode then set the
				289	\| input-denormal exception and return zero. Otherwise just return the value.
				290	----------------------------------------------------------------------------/
Alex Bennée	7baeabc	2014-03-17 16:31:51 +0000	[diff] [blame]	291	float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	292	{
				293	if (STATUS(flush_inputs_to_zero)) {
				294	if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
				295	float_raise(float_flag_input_denormal STATUS_VAR);
				296	return make_float32(float32_val(a) & 0x80000000);
				297	}
				298	}
				299	return a;
				300	}
				301
				302	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	303	\| Normalizes the subnormal single-precision floating-point value represented
				304	\| by the denormalized significand `aSig'. The normalized exponent and
				305	\| significand are stored at the locations pointed to by `zExpPtr' and
				306	\| `zSigPtr', respectively.
				307	----------------------------------------------------------------------------/
				308
				309	static void
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	310	normalizeFloat32Subnormal(uint32_t aSig, int_fast16_t zExpPtr, uint32_t zSigPtr)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	311	{
				312	int8 shiftCount;
				313
				314	shiftCount = countLeadingZeros32( aSig ) - 8;
				315	*zSigPtr = aSig<<shiftCount;
				316	*zExpPtr = 1 - shiftCount;
				317
				318	}
				319
				320	/*----------------------------------------------------------------------------
				321	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				322	\| single-precision floating-point value, returning the result. After being
				323	\| shifted into the proper positions, the three fields are simply added
				324	\| together to form the result. This means that any integer portion of `zSig'
				325	\| will be added into the exponent. Since a properly normalized significand
				326	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				327	\| than the desired result exponent whenever `zSig' is a complete, normalized
				328	\| significand.
				329	----------------------------------------------------------------------------/
				330
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	331	static inline float32 packFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	332	{
				333
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	334	return make_float32(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	335	( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	336
				337	}
				338
				339	/*----------------------------------------------------------------------------
				340	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				341	\| and significand `zSig', and returns the proper single-precision floating-
				342	\| point value corresponding to the abstract input. Ordinarily, the abstract
				343	\| value is simply rounded and packed into the single-precision format, with
				344	\| the inexact exception raised if the abstract input cannot be represented
				345	\| exactly. However, if the abstract value is too large, the overflow and
				346	\| inexact exceptions are raised and an infinity or maximal finite value is
				347	\| returned. If the abstract value is too small, the input value is rounded to
				348	\| a subnormal number, and the underflow and inexact exceptions are raised if
				349	\| the abstract input cannot be represented exactly as a subnormal single-
				350	\| precision floating-point number.
				351	\| The input significand `zSig' has its binary point between bits 30
				352	\| and 29, which is 7 bits to the left of the usual location. This shifted
				353	\| significand must be normalized or smaller. If `zSig' is not normalized,
				354	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				355	\| and it must not require rounding. In the usual case that `zSig' is
				356	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				357	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				358	\| Binary Floating-Point Arithmetic.
				359	----------------------------------------------------------------------------/
				360
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	361	static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	362	{
				363	int8 roundingMode;
				364	flag roundNearestEven;
				365	int8 roundIncrement, roundBits;
				366	flag isTiny;
				367
				368	roundingMode = STATUS(float_rounding_mode);
				369	roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	370	switch (roundingMode) {
				371	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	372	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	373	roundIncrement = 0x40;
				374	break;
				375	case float_round_to_zero:
				376	roundIncrement = 0;
				377	break;
				378	case float_round_up:
				379	roundIncrement = zSign ? 0 : 0x7f;
				380	break;
				381	case float_round_down:
				382	roundIncrement = zSign ? 0x7f : 0;
				383	break;
				384	default:
				385	abort();
				386	break;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	387	}
				388	roundBits = zSig & 0x7F;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	389	if ( 0xFD <= (uint16_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	390	if ( ( 0xFD < zExp )
				391	\|\| ( ( zExp == 0xFD )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	392	&& ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	393	) {
				394	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	395	return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	396	}
				397	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	398	if (STATUS(flush_to_zero)) {
				399	float_raise(float_flag_output_denormal STATUS_VAR);
				400	return packFloat32(zSign, 0, 0);
				401	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	402	isTiny =
				403	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				404	\|\| ( zExp < -1 )
				405	\|\| ( zSig + roundIncrement < 0x80000000 );
				406	shift32RightJamming( zSig, - zExp, &zSig );
				407	zExp = 0;
				408	roundBits = zSig & 0x7F;
				409	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				410	}
				411	}
				412	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				413	zSig = ( zSig + roundIncrement )>>7;
				414	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				415	if ( zSig == 0 ) zExp = 0;
				416	return packFloat32( zSign, zExp, zSig );
				417
				418	}
				419
				420	/*----------------------------------------------------------------------------
				421	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				422	\| and significand `zSig', and returns the proper single-precision floating-
				423	\| point value corresponding to the abstract input. This routine is just like
				424	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
				425	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				426	\| floating-point exponent.
				427	----------------------------------------------------------------------------/
				428
				429	static float32
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	430	normalizeRoundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	431	{
				432	int8 shiftCount;
				433
				434	shiftCount = countLeadingZeros32( zSig ) - 1;
				435	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				436
				437	}
				438
				439	/*----------------------------------------------------------------------------
				440	\| Returns the fraction bits of the double-precision floating-point value `a'.
				441	----------------------------------------------------------------------------/
				442
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	443	static inline uint64_t extractFloat64Frac( float64 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	444	{
				445
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	446	return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	447
				448	}
				449
				450	/*----------------------------------------------------------------------------
				451	\| Returns the exponent bits of the double-precision floating-point value `a'.
				452	----------------------------------------------------------------------------/
				453
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	454	static inline int_fast16_t extractFloat64Exp(float64 a)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	455	{
				456
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	457	return ( float64_val(a)>>52 ) & 0x7FF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	458
				459	}
				460
				461	/*----------------------------------------------------------------------------
				462	\| Returns the sign bit of the double-precision floating-point value `a'.
				463	----------------------------------------------------------------------------/
				464
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	465	static inline flag extractFloat64Sign( float64 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	466	{
				467
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	468	return float64_val(a)>>63;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	469
				470	}
				471
				472	/*----------------------------------------------------------------------------
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	473	\| If `a' is denormal and we are in flush-to-zero mode then set the
				474	\| input-denormal exception and return zero. Otherwise just return the value.
				475	----------------------------------------------------------------------------/
Alex Bennée	7baeabc	2014-03-17 16:31:51 +0000	[diff] [blame]	476	float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	477	{
				478	if (STATUS(flush_inputs_to_zero)) {
				479	if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
				480	float_raise(float_flag_input_denormal STATUS_VAR);
				481	return make_float64(float64_val(a) & (1ULL << 63));
				482	}
				483	}
				484	return a;
				485	}
				486
				487	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	488	\| Normalizes the subnormal double-precision floating-point value represented
				489	\| by the denormalized significand `aSig'. The normalized exponent and
				490	\| significand are stored at the locations pointed to by `zExpPtr' and
				491	\| `zSigPtr', respectively.
				492	----------------------------------------------------------------------------/
				493
				494	static void
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	495	normalizeFloat64Subnormal(uint64_t aSig, int_fast16_t zExpPtr, uint64_t zSigPtr)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	496	{
				497	int8 shiftCount;
				498
				499	shiftCount = countLeadingZeros64( aSig ) - 11;
				500	*zSigPtr = aSig<<shiftCount;
				501	*zExpPtr = 1 - shiftCount;
				502
				503	}
				504
				505	/*----------------------------------------------------------------------------
				506	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				507	\| double-precision floating-point value, returning the result. After being
				508	\| shifted into the proper positions, the three fields are simply added
				509	\| together to form the result. This means that any integer portion of `zSig'
				510	\| will be added into the exponent. Since a properly normalized significand
				511	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				512	\| than the desired result exponent whenever `zSig' is a complete, normalized
				513	\| significand.
				514	----------------------------------------------------------------------------/
				515
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	516	static inline float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	517	{
				518
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	519	return make_float64(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	520	( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	521
				522	}
				523
				524	/*----------------------------------------------------------------------------
				525	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				526	\| and significand `zSig', and returns the proper double-precision floating-
				527	\| point value corresponding to the abstract input. Ordinarily, the abstract
				528	\| value is simply rounded and packed into the double-precision format, with
				529	\| the inexact exception raised if the abstract input cannot be represented
				530	\| exactly. However, if the abstract value is too large, the overflow and
				531	\| inexact exceptions are raised and an infinity or maximal finite value is
				532	\| returned. If the abstract value is too small, the input value is rounded
				533	\| to a subnormal number, and the underflow and inexact exceptions are raised
				534	\| if the abstract input cannot be represented exactly as a subnormal double-
				535	\| precision floating-point number.
				536	\| The input significand `zSig' has its binary point between bits 62
				537	\| and 61, which is 10 bits to the left of the usual location. This shifted
				538	\| significand must be normalized or smaller. If `zSig' is not normalized,
				539	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				540	\| and it must not require rounding. In the usual case that `zSig' is
				541	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				542	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				543	\| Binary Floating-Point Arithmetic.
				544	----------------------------------------------------------------------------/
				545
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	546	static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	547	{
				548	int8 roundingMode;
				549	flag roundNearestEven;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	550	int_fast16_t roundIncrement, roundBits;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	551	flag isTiny;
				552
				553	roundingMode = STATUS(float_rounding_mode);
				554	roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	555	switch (roundingMode) {
				556	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	557	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	558	roundIncrement = 0x200;
				559	break;
				560	case float_round_to_zero:
				561	roundIncrement = 0;
				562	break;
				563	case float_round_up:
				564	roundIncrement = zSign ? 0 : 0x3ff;
				565	break;
				566	case float_round_down:
				567	roundIncrement = zSign ? 0x3ff : 0;
				568	break;
				569	default:
				570	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	571	}
				572	roundBits = zSig & 0x3FF;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	573	if ( 0x7FD <= (uint16_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	574	if ( ( 0x7FD < zExp )
				575	\|\| ( ( zExp == 0x7FD )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	576	&& ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	577	) {
				578	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	579	return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	580	}
				581	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	582	if (STATUS(flush_to_zero)) {
				583	float_raise(float_flag_output_denormal STATUS_VAR);
				584	return packFloat64(zSign, 0, 0);
				585	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	586	isTiny =
				587	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				588	\|\| ( zExp < -1 )
				589	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				590	shift64RightJamming( zSig, - zExp, &zSig );
				591	zExp = 0;
				592	roundBits = zSig & 0x3FF;
				593	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				594	}
				595	}
				596	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				597	zSig = ( zSig + roundIncrement )>>10;
				598	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				599	if ( zSig == 0 ) zExp = 0;
				600	return packFloat64( zSign, zExp, zSig );
				601
				602	}
				603
				604	/*----------------------------------------------------------------------------
				605	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				606	\| and significand `zSig', and returns the proper double-precision floating-
				607	\| point value corresponding to the abstract input. This routine is just like
				608	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
				609	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				610	\| floating-point exponent.
				611	----------------------------------------------------------------------------/
				612
				613	static float64
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	614	normalizeRoundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	615	{
				616	int8 shiftCount;
				617
				618	shiftCount = countLeadingZeros64( zSig ) - 1;
				619	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				620
				621	}
				622
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	623	/*----------------------------------------------------------------------------
				624	\| Returns the fraction bits of the extended double-precision floating-point
				625	\| value `a'.
				626	----------------------------------------------------------------------------/
				627
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	628	static inline uint64_t extractFloatx80Frac( floatx80 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	629	{
				630
				631	return a.low;
				632
				633	}
				634
				635	/*----------------------------------------------------------------------------
				636	\| Returns the exponent bits of the extended double-precision floating-point
				637	\| value `a'.
				638	----------------------------------------------------------------------------/
				639
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	640	static inline int32 extractFloatx80Exp( floatx80 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	641	{
				642
				643	return a.high & 0x7FFF;
				644
				645	}
				646
				647	/*----------------------------------------------------------------------------
				648	\| Returns the sign bit of the extended double-precision floating-point value
				649	\| `a'.
				650	----------------------------------------------------------------------------/
				651
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	652	static inline flag extractFloatx80Sign( floatx80 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	653	{
				654
				655	return a.high>>15;
				656
				657	}
				658
				659	/*----------------------------------------------------------------------------
				660	\| Normalizes the subnormal extended double-precision floating-point value
				661	\| represented by the denormalized significand `aSig'. The normalized exponent
				662	\| and significand are stored at the locations pointed to by `zExpPtr' and
				663	\| `zSigPtr', respectively.
				664	----------------------------------------------------------------------------/
				665
				666	static void
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	667	normalizeFloatx80Subnormal( uint64_t aSig, int32 zExpPtr, uint64_t zSigPtr )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	668	{
				669	int8 shiftCount;
				670
				671	shiftCount = countLeadingZeros64( aSig );
				672	*zSigPtr = aSig<<shiftCount;
				673	*zExpPtr = 1 - shiftCount;
				674
				675	}
				676
				677	/*----------------------------------------------------------------------------
				678	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				679	\| extended double-precision floating-point value, returning the result.
				680	----------------------------------------------------------------------------/
				681
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	682	static inline floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	683	{
				684	floatx80 z;
				685
				686	z.low = zSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	687	z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	688	return z;
				689
				690	}
				691
				692	/*----------------------------------------------------------------------------
				693	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				694	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
				695	\| and returns the proper extended double-precision floating-point value
				696	\| corresponding to the abstract input. Ordinarily, the abstract value is
				697	\| rounded and packed into the extended double-precision format, with the
				698	\| inexact exception raised if the abstract input cannot be represented
				699	\| exactly. However, if the abstract value is too large, the overflow and
				700	\| inexact exceptions are raised and an infinity or maximal finite value is
				701	\| returned. If the abstract value is too small, the input value is rounded to
				702	\| a subnormal number, and the underflow and inexact exceptions are raised if
				703	\| the abstract input cannot be represented exactly as a subnormal extended
				704	\| double-precision floating-point number.
				705	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
				706	\| number of bits as single or double precision, respectively. Otherwise, the
				707	\| result is rounded to the full precision of the extended double-precision
				708	\| format.
				709	\| The input significand must be normalized or smaller. If the input
				710	\| significand is not normalized, `zExp' must be 0; in that case, the result
				711	\| returned is a subnormal number, and it must not require rounding. The
				712	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				713	\| Floating-Point Arithmetic.
				714	----------------------------------------------------------------------------/
				715
				716	static floatx80
				717	roundAndPackFloatx80(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	718	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	719	STATUS_PARAM)
				720	{
				721	int8 roundingMode;
				722	flag roundNearestEven, increment, isTiny;
				723	int64 roundIncrement, roundMask, roundBits;
				724
				725	roundingMode = STATUS(float_rounding_mode);
				726	roundNearestEven = ( roundingMode == float_round_nearest_even );
				727	if ( roundingPrecision == 80 ) goto precision80;
				728	if ( roundingPrecision == 64 ) {
				729	roundIncrement = LIT64( 0x0000000000000400 );
				730	roundMask = LIT64( 0x00000000000007FF );
				731	}
				732	else if ( roundingPrecision == 32 ) {
				733	roundIncrement = LIT64( 0x0000008000000000 );
				734	roundMask = LIT64( 0x000000FFFFFFFFFF );
				735	}
				736	else {
				737	goto precision80;
				738	}
				739	zSig0 \|= ( zSig1 != 0 );
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	740	switch (roundingMode) {
				741	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	742	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	743	break;
				744	case float_round_to_zero:
				745	roundIncrement = 0;
				746	break;
				747	case float_round_up:
				748	roundIncrement = zSign ? 0 : roundMask;
				749	break;
				750	case float_round_down:
				751	roundIncrement = zSign ? roundMask : 0;
				752	break;
				753	default:
				754	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	755	}
				756	roundBits = zSig0 & roundMask;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	757	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	758	if ( ( 0x7FFE < zExp )
				759	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				760	) {
				761	goto overflow;
				762	}
				763	if ( zExp <= 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	764	if (STATUS(flush_to_zero)) {
				765	float_raise(float_flag_output_denormal STATUS_VAR);
				766	return packFloatx80(zSign, 0, 0);
				767	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	768	isTiny =
				769	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				770	\|\| ( zExp < 0 )
				771	\|\| ( zSig0 <= zSig0 + roundIncrement );
				772	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				773	zExp = 0;
				774	roundBits = zSig0 & roundMask;
				775	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				776	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				777	zSig0 += roundIncrement;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	778	if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	779	roundIncrement = roundMask + 1;
				780	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				781	roundMask \|= roundIncrement;
				782	}
				783	zSig0 &= ~ roundMask;
				784	return packFloatx80( zSign, zExp, zSig0 );
				785	}
				786	}
				787	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				788	zSig0 += roundIncrement;
				789	if ( zSig0 < roundIncrement ) {
				790	++zExp;
				791	zSig0 = LIT64( 0x8000000000000000 );
				792	}
				793	roundIncrement = roundMask + 1;
				794	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				795	roundMask \|= roundIncrement;
				796	}
				797	zSig0 &= ~ roundMask;
				798	if ( zSig0 == 0 ) zExp = 0;
				799	return packFloatx80( zSign, zExp, zSig0 );
				800	precision80:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	801	switch (roundingMode) {
				802	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	803	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	804	increment = ((int64_t)zSig1 < 0);
				805	break;
				806	case float_round_to_zero:
				807	increment = 0;
				808	break;
				809	case float_round_up:
				810	increment = !zSign && zSig1;
				811	break;
				812	case float_round_down:
				813	increment = zSign && zSig1;
				814	break;
				815	default:
				816	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	817	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	818	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	819	if ( ( 0x7FFE < zExp )
				820	\|\| ( ( zExp == 0x7FFE )
				821	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				822	&& increment
				823	)
				824	) {
				825	roundMask = 0;
				826	overflow:
				827	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				828	if ( ( roundingMode == float_round_to_zero )
				829	\|\| ( zSign && ( roundingMode == float_round_up ) )
				830	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				831	) {
				832	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				833	}
				834	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				835	}
				836	if ( zExp <= 0 ) {
				837	isTiny =
				838	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				839	\|\| ( zExp < 0 )
				840	\|\| ! increment
				841	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				842	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				843	zExp = 0;
				844	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
				845	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	846	switch (roundingMode) {
				847	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	848	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	849	increment = ((int64_t)zSig1 < 0);
				850	break;
				851	case float_round_to_zero:
				852	increment = 0;
				853	break;
				854	case float_round_up:
				855	increment = !zSign && zSig1;
				856	break;
				857	case float_round_down:
				858	increment = zSign && zSig1;
				859	break;
				860	default:
				861	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	862	}
				863	if ( increment ) {
				864	++zSig0;
				865	zSig0 &=
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	866	~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				867	if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	868	}
				869	return packFloatx80( zSign, zExp, zSig0 );
				870	}
				871	}
				872	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				873	if ( increment ) {
				874	++zSig0;
				875	if ( zSig0 == 0 ) {
				876	++zExp;
				877	zSig0 = LIT64( 0x8000000000000000 );
				878	}
				879	else {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	880	zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	881	}
				882	}
				883	else {
				884	if ( zSig0 == 0 ) zExp = 0;
				885	}
				886	return packFloatx80( zSign, zExp, zSig0 );
				887
				888	}
				889
				890	/*----------------------------------------------------------------------------
				891	\| Takes an abstract floating-point value having sign `zSign', exponent
				892	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				893	\| and returns the proper extended double-precision floating-point value
				894	\| corresponding to the abstract input. This routine is just like
				895	\| `roundAndPackFloatx80' except that the input significand does not have to be
				896	\| normalized.
				897	----------------------------------------------------------------------------/
				898
				899	static floatx80
				900	normalizeRoundAndPackFloatx80(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	901	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	902	STATUS_PARAM)
				903	{
				904	int8 shiftCount;
				905
				906	if ( zSig0 == 0 ) {
				907	zSig0 = zSig1;
				908	zSig1 = 0;
				909	zExp -= 64;
				910	}
				911	shiftCount = countLeadingZeros64( zSig0 );
				912	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				913	zExp -= shiftCount;
				914	return
				915	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
				916
				917	}
				918
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	919	/*----------------------------------------------------------------------------
				920	\| Returns the least-significant 64 fraction bits of the quadruple-precision
				921	\| floating-point value `a'.
				922	----------------------------------------------------------------------------/
				923
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	924	static inline uint64_t extractFloat128Frac1( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	925	{
				926
				927	return a.low;
				928
				929	}
				930
				931	/*----------------------------------------------------------------------------
				932	\| Returns the most-significant 48 fraction bits of the quadruple-precision
				933	\| floating-point value `a'.
				934	----------------------------------------------------------------------------/
				935
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	936	static inline uint64_t extractFloat128Frac0( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	937	{
				938
				939	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
				940
				941	}
				942
				943	/*----------------------------------------------------------------------------
				944	\| Returns the exponent bits of the quadruple-precision floating-point value
				945	\| `a'.
				946	----------------------------------------------------------------------------/
				947
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	948	static inline int32 extractFloat128Exp( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	949	{
				950
				951	return ( a.high>>48 ) & 0x7FFF;
				952
				953	}
				954
				955	/*----------------------------------------------------------------------------
				956	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
				957	----------------------------------------------------------------------------/
				958
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	959	static inline flag extractFloat128Sign( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	960	{
				961
				962	return a.high>>63;
				963
				964	}
				965
				966	/*----------------------------------------------------------------------------
				967	\| Normalizes the subnormal quadruple-precision floating-point value
				968	\| represented by the denormalized significand formed by the concatenation of
				969	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
				970	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
				971	\| significand are stored at the location pointed to by `zSig0Ptr', and the
				972	\| least significant 64 bits of the normalized significand are stored at the
				973	\| location pointed to by `zSig1Ptr'.
				974	----------------------------------------------------------------------------/
				975
				976	static void
				977	normalizeFloat128Subnormal(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	978	uint64_t aSig0,
				979	uint64_t aSig1,
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	980	int32 *zExpPtr,
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	981	uint64_t *zSig0Ptr,
				982	uint64_t *zSig1Ptr
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	983	)
				984	{
				985	int8 shiftCount;
				986
				987	if ( aSig0 == 0 ) {
				988	shiftCount = countLeadingZeros64( aSig1 ) - 15;
				989	if ( shiftCount < 0 ) {
				990	*zSig0Ptr = aSig1>>( - shiftCount );
				991	*zSig1Ptr = aSig1<<( shiftCount & 63 );
				992	}
				993	else {
				994	*zSig0Ptr = aSig1<<shiftCount;
				995	*zSig1Ptr = 0;
				996	}
				997	*zExpPtr = - shiftCount - 63;
				998	}
				999	else {
				1000	shiftCount = countLeadingZeros64( aSig0 ) - 15;
				1001	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
				1002	*zExpPtr = 1 - shiftCount;
				1003	}
				1004
				1005	}
				1006
				1007	/*----------------------------------------------------------------------------
				1008	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
				1009	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
				1010	\| floating-point value, returning the result. After being shifted into the
				1011	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
				1012	\| added together to form the most significant 32 bits of the result. This
				1013	\| means that any integer portion of `zSig0' will be added into the exponent.
				1014	\| Since a properly normalized significand will have an integer portion equal
				1015	\| to 1, the `zExp' input should be 1 less than the desired result exponent
				1016	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
				1017	\| significand.
				1018	----------------------------------------------------------------------------/
				1019
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	1020	static inline float128
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1021	packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1022	{
				1023	float128 z;
				1024
				1025	z.low = zSig1;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1026	z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1027	return z;
				1028
				1029	}
				1030
				1031	/*----------------------------------------------------------------------------
				1032	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				1033	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
				1034	\| and `zSig2', and returns the proper quadruple-precision floating-point value
				1035	\| corresponding to the abstract input. Ordinarily, the abstract value is
				1036	\| simply rounded and packed into the quadruple-precision format, with the
				1037	\| inexact exception raised if the abstract input cannot be represented
				1038	\| exactly. However, if the abstract value is too large, the overflow and
				1039	\| inexact exceptions are raised and an infinity or maximal finite value is
				1040	\| returned. If the abstract value is too small, the input value is rounded to
				1041	\| a subnormal number, and the underflow and inexact exceptions are raised if
				1042	\| the abstract input cannot be represented exactly as a subnormal quadruple-
				1043	\| precision floating-point number.
				1044	\| The input significand must be normalized or smaller. If the input
				1045	\| significand is not normalized, `zExp' must be 0; in that case, the result
				1046	\| returned is a subnormal number, and it must not require rounding. In the
				1047	\| usual case that the input significand is normalized, `zExp' must be 1 less
				1048	\| than the ``true'' floating-point exponent. The handling of underflow and
				1049	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1050	----------------------------------------------------------------------------/
				1051
				1052	static float128
				1053	roundAndPackFloat128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1054	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1055	{
				1056	int8 roundingMode;
				1057	flag roundNearestEven, increment, isTiny;
				1058
				1059	roundingMode = STATUS(float_rounding_mode);
				1060	roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1061	switch (roundingMode) {
				1062	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	1063	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1064	increment = ((int64_t)zSig2 < 0);
				1065	break;
				1066	case float_round_to_zero:
				1067	increment = 0;
				1068	break;
				1069	case float_round_up:
				1070	increment = !zSign && zSig2;
				1071	break;
				1072	case float_round_down:
				1073	increment = zSign && zSig2;
				1074	break;
				1075	default:
				1076	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1077	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1078	if ( 0x7FFD <= (uint32_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1079	if ( ( 0x7FFD < zExp )
				1080	\|\| ( ( zExp == 0x7FFD )
				1081	&& eq128(
				1082	LIT64( 0x0001FFFFFFFFFFFF ),
				1083	LIT64( 0xFFFFFFFFFFFFFFFF ),
				1084	zSig0,
				1085	zSig1
				1086	)
				1087	&& increment
				1088	)
				1089	) {
				1090	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				1091	if ( ( roundingMode == float_round_to_zero )
				1092	\|\| ( zSign && ( roundingMode == float_round_up ) )
				1093	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				1094	) {
				1095	return
				1096	packFloat128(
				1097	zSign,
				1098	0x7FFE,
				1099	LIT64( 0x0000FFFFFFFFFFFF ),
				1100	LIT64( 0xFFFFFFFFFFFFFFFF )
				1101	);
				1102	}
				1103	return packFloat128( zSign, 0x7FFF, 0, 0 );
				1104	}
				1105	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	1106	if (STATUS(flush_to_zero)) {
				1107	float_raise(float_flag_output_denormal STATUS_VAR);
				1108	return packFloat128(zSign, 0, 0, 0);
				1109	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1110	isTiny =
				1111	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				1112	\|\| ( zExp < -1 )
				1113	\|\| ! increment
				1114	\|\| lt128(
				1115	zSig0,
				1116	zSig1,
				1117	LIT64( 0x0001FFFFFFFFFFFF ),
				1118	LIT64( 0xFFFFFFFFFFFFFFFF )
				1119	);
				1120	shift128ExtraRightJamming(
				1121	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
				1122	zExp = 0;
				1123	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1124	switch (roundingMode) {
				1125	case float_round_nearest_even:
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	1126	case float_round_ties_away:
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1127	increment = ((int64_t)zSig2 < 0);
				1128	break;
				1129	case float_round_to_zero:
				1130	increment = 0;
				1131	break;
				1132	case float_round_up:
				1133	increment = !zSign && zSig2;
				1134	break;
				1135	case float_round_down:
				1136	increment = zSign && zSig2;
				1137	break;
				1138	default:
				1139	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1140	}
				1141	}
				1142	}
				1143	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1144	if ( increment ) {
				1145	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
				1146	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
				1147	}
				1148	else {
				1149	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
				1150	}
				1151	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1152
				1153	}
				1154
				1155	/*----------------------------------------------------------------------------
				1156	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				1157	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
				1158	\| returns the proper quadruple-precision floating-point value corresponding
				1159	\| to the abstract input. This routine is just like `roundAndPackFloat128'
				1160	\| except that the input significand has fewer bits and does not have to be
				1161	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
				1162	\| point exponent.
				1163	----------------------------------------------------------------------------/
				1164
				1165	static float128
				1166	normalizeRoundAndPackFloat128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1167	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1168	{
				1169	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1170	uint64_t zSig2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1171
				1172	if ( zSig0 == 0 ) {
				1173	zSig0 = zSig1;
				1174	zSig1 = 0;
				1175	zExp -= 64;
				1176	}
				1177	shiftCount = countLeadingZeros64( zSig0 ) - 15;
				1178	if ( 0 <= shiftCount ) {
				1179	zSig2 = 0;
				1180	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1181	}
				1182	else {
				1183	shift128ExtraRightJamming(
				1184	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
				1185	}
				1186	zExp -= shiftCount;
				1187	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
				1188
				1189	}
				1190
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1191	/*----------------------------------------------------------------------------
				1192	\| Returns the result of converting the 32-bit two's complement integer `a'
				1193	\| to the single-precision floating-point format. The conversion is performed
				1194	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1195	----------------------------------------------------------------------------/
				1196
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1197	float32 int32_to_float32(int32_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1198	{
				1199	flag zSign;
				1200
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1201	if ( a == 0 ) return float32_zero;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1202	if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1203	zSign = ( a < 0 );
				1204	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
				1205
				1206	}
				1207
				1208	/*----------------------------------------------------------------------------
				1209	\| Returns the result of converting the 32-bit two's complement integer `a'
				1210	\| to the double-precision floating-point format. The conversion is performed
				1211	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1212	----------------------------------------------------------------------------/
				1213
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1214	float64 int32_to_float64(int32_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1215	{
				1216	flag zSign;
				1217	uint32 absA;
				1218	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1219	uint64_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1220
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1221	if ( a == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1222	zSign = ( a < 0 );
				1223	absA = zSign ? - a : a;
				1224	shiftCount = countLeadingZeros32( absA ) + 21;
				1225	zSig = absA;
				1226	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
				1227
				1228	}
				1229
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1230	/*----------------------------------------------------------------------------
				1231	\| Returns the result of converting the 32-bit two's complement integer `a'
				1232	\| to the extended double-precision floating-point format. The conversion
				1233	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1234	\| Arithmetic.
				1235	----------------------------------------------------------------------------/
				1236
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1237	floatx80 int32_to_floatx80(int32_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1238	{
				1239	flag zSign;
				1240	uint32 absA;
				1241	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1242	uint64_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1243
				1244	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1245	zSign = ( a < 0 );
				1246	absA = zSign ? - a : a;
				1247	shiftCount = countLeadingZeros32( absA ) + 32;
				1248	zSig = absA;
				1249	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				1250
				1251	}
				1252
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1253	/*----------------------------------------------------------------------------
				1254	\| Returns the result of converting the 32-bit two's complement integer `a' to
				1255	\| the quadruple-precision floating-point format. The conversion is performed
				1256	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1257	----------------------------------------------------------------------------/
				1258
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1259	float128 int32_to_float128(int32_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1260	{
				1261	flag zSign;
				1262	uint32 absA;
				1263	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1264	uint64_t zSig0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1265
				1266	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1267	zSign = ( a < 0 );
				1268	absA = zSign ? - a : a;
				1269	shiftCount = countLeadingZeros32( absA ) + 17;
				1270	zSig0 = absA;
				1271	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
				1272
				1273	}
				1274
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1275	/*----------------------------------------------------------------------------
				1276	\| Returns the result of converting the 64-bit two's complement integer `a'
				1277	\| to the single-precision floating-point format. The conversion is performed
				1278	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1279	----------------------------------------------------------------------------/
				1280
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1281	float32 int64_to_float32(int64_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1282	{
				1283	flag zSign;
				1284	uint64 absA;
				1285	int8 shiftCount;
				1286
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1287	if ( a == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1288	zSign = ( a < 0 );
				1289	absA = zSign ? - a : a;
				1290	shiftCount = countLeadingZeros64( absA ) - 40;
				1291	if ( 0 <= shiftCount ) {
				1292	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
				1293	}
				1294	else {
				1295	shiftCount += 7;
				1296	if ( shiftCount < 0 ) {
				1297	shift64RightJamming( absA, - shiftCount, &absA );
				1298	}
				1299	else {
				1300	absA <<= shiftCount;
				1301	}
				1302	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
				1303	}
				1304
				1305	}
				1306
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1307	float32 uint64_to_float32(uint64_t a STATUS_PARAM)
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1308	{
				1309	int8 shiftCount;
				1310
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1311	if ( a == 0 ) return float32_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1312	shiftCount = countLeadingZeros64( a ) - 40;
				1313	if ( 0 <= shiftCount ) {
Peter Maydell	e744c06	2012-09-28 16:17:03 +0100	[diff] [blame]	1314	return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1315	}
				1316	else {
				1317	shiftCount += 7;
				1318	if ( shiftCount < 0 ) {
				1319	shift64RightJamming( a, - shiftCount, &a );
				1320	}
				1321	else {
				1322	a <<= shiftCount;
				1323	}
Peter Maydell	e744c06	2012-09-28 16:17:03 +0100	[diff] [blame]	1324	return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1325	}
				1326	}
				1327
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1328	/*----------------------------------------------------------------------------
				1329	\| Returns the result of converting the 64-bit two's complement integer `a'
				1330	\| to the double-precision floating-point format. The conversion is performed
				1331	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1332	----------------------------------------------------------------------------/
				1333
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1334	float64 int64_to_float64(int64_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1335	{
				1336	flag zSign;
				1337
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1338	if ( a == 0 ) return float64_zero;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1339	if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1340	return packFloat64( 1, 0x43E, 0 );
				1341	}
				1342	zSign = ( a < 0 );
				1343	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
				1344
				1345	}
				1346
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1347	float64 uint64_to_float64(uint64_t a STATUS_PARAM)
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1348	{
Richard Henderson	17ed229	2012-12-31 10:09:03 -0800	[diff] [blame]	1349	int exp = 0x43C;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1350
Richard Henderson	17ed229	2012-12-31 10:09:03 -0800	[diff] [blame]	1351	if (a == 0) {
				1352	return float64_zero;
				1353	}
				1354	if ((int64_t)a < 0) {
				1355	shift64RightJamming(a, 1, &a);
				1356	exp += 1;
				1357	}
				1358	return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1359	}
				1360
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1361	/*----------------------------------------------------------------------------
				1362	\| Returns the result of converting the 64-bit two's complement integer `a'
				1363	\| to the extended double-precision floating-point format. The conversion
				1364	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1365	\| Arithmetic.
				1366	----------------------------------------------------------------------------/
				1367
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1368	floatx80 int64_to_floatx80(int64_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1369	{
				1370	flag zSign;
				1371	uint64 absA;
				1372	int8 shiftCount;
				1373
				1374	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1375	zSign = ( a < 0 );
				1376	absA = zSign ? - a : a;
				1377	shiftCount = countLeadingZeros64( absA );
				1378	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
				1379
				1380	}
				1381
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1382	/*----------------------------------------------------------------------------
				1383	\| Returns the result of converting the 64-bit two's complement integer `a' to
				1384	\| the quadruple-precision floating-point format. The conversion is performed
				1385	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1386	----------------------------------------------------------------------------/
				1387
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1388	float128 int64_to_float128(int64_t a STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1389	{
				1390	flag zSign;
				1391	uint64 absA;
				1392	int8 shiftCount;
				1393	int32 zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1394	uint64_t zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1395
				1396	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1397	zSign = ( a < 0 );
				1398	absA = zSign ? - a : a;
				1399	shiftCount = countLeadingZeros64( absA ) + 49;
				1400	zExp = 0x406E - shiftCount;
				1401	if ( 64 <= shiftCount ) {
				1402	zSig1 = 0;
				1403	zSig0 = absA;
				1404	shiftCount -= 64;
				1405	}
				1406	else {
				1407	zSig1 = absA;
				1408	zSig0 = 0;
				1409	}
				1410	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1411	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1412
				1413	}
				1414
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	1415	float128 uint64_to_float128(uint64_t a STATUS_PARAM)
Richard Henderson	1e397ea	2012-12-31 10:09:04 -0800	[diff] [blame]	1416	{
				1417	if (a == 0) {
				1418	return float128_zero;
				1419	}
				1420	return normalizeRoundAndPackFloat128(0, 0x406E, a, 0 STATUS_VAR);
				1421	}
				1422
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1423	/*----------------------------------------------------------------------------
				1424	\| Returns the result of converting the single-precision floating-point value
				1425	\| `a' to the 32-bit two's complement integer format. The conversion is
				1426	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1427	\| Arithmetic---which means in particular that the conversion is rounded
				1428	\| according to the current rounding mode. If `a' is a NaN, the largest
				1429	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1430	\| largest integer with the same sign as `a' is returned.
				1431	----------------------------------------------------------------------------/
				1432
				1433	int32 float32_to_int32( float32 a STATUS_PARAM )
				1434	{
				1435	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1436	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1437	uint32_t aSig;
				1438	uint64_t aSig64;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1439
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1440	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1441	aSig = extractFloat32Frac( a );
				1442	aExp = extractFloat32Exp( a );
				1443	aSign = extractFloat32Sign( a );
				1444	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
				1445	if ( aExp ) aSig \|= 0x00800000;
				1446	shiftCount = 0xAF - aExp;
				1447	aSig64 = aSig;
				1448	aSig64 <<= 32;
				1449	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
				1450	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
				1451
				1452	}
				1453
				1454	/*----------------------------------------------------------------------------
				1455	\| Returns the result of converting the single-precision floating-point value
				1456	\| `a' to the 32-bit two's complement integer format. The conversion is
				1457	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1458	\| Arithmetic, except that the conversion is always rounded toward zero.
				1459	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1460	\| the conversion overflows, the largest integer with the same sign as `a' is
				1461	\| returned.
				1462	----------------------------------------------------------------------------/
				1463
				1464	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
				1465	{
				1466	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1467	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1468	uint32_t aSig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	1469	int32_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1470	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1471
				1472	aSig = extractFloat32Frac( a );
				1473	aExp = extractFloat32Exp( a );
				1474	aSign = extractFloat32Sign( a );
				1475	shiftCount = aExp - 0x9E;
				1476	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1477	if ( float32_val(a) != 0xCF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1478	float_raise( float_flag_invalid STATUS_VAR);
				1479	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				1480	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1481	return (int32_t) 0x80000000;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1482	}
				1483	else if ( aExp <= 0x7E ) {
				1484	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1485	return 0;
				1486	}
				1487	aSig = ( aSig \| 0x00800000 )<<8;
				1488	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1489	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1490	STATUS(float_exception_flags) \|= float_flag_inexact;
				1491	}
				1492	if ( aSign ) z = - z;
				1493	return z;
				1494
				1495	}
				1496
				1497	/*----------------------------------------------------------------------------
				1498	\| Returns the result of converting the single-precision floating-point value
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1499	\| `a' to the 16-bit two's complement integer format. The conversion is
				1500	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1501	\| Arithmetic, except that the conversion is always rounded toward zero.
				1502	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1503	\| the conversion overflows, the largest integer with the same sign as `a' is
				1504	\| returned.
				1505	----------------------------------------------------------------------------/
				1506
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1507	int_fast16_t float32_to_int16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1508	{
				1509	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1510	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1511	uint32_t aSig;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1512	int32 z;
				1513
				1514	aSig = extractFloat32Frac( a );
				1515	aExp = extractFloat32Exp( a );
				1516	aSign = extractFloat32Sign( a );
				1517	shiftCount = aExp - 0x8E;
				1518	if ( 0 <= shiftCount ) {
				1519	if ( float32_val(a) != 0xC7000000 ) {
				1520	float_raise( float_flag_invalid STATUS_VAR);
				1521	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1522	return 0x7FFF;
				1523	}
				1524	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1525	return (int32_t) 0xffff8000;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1526	}
				1527	else if ( aExp <= 0x7E ) {
				1528	if ( aExp \| aSig ) {
				1529	STATUS(float_exception_flags) \|= float_flag_inexact;
				1530	}
				1531	return 0;
				1532	}
				1533	shiftCount -= 0x10;
				1534	aSig = ( aSig \| 0x00800000 )<<8;
				1535	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1536	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1537	STATUS(float_exception_flags) \|= float_flag_inexact;
				1538	}
				1539	if ( aSign ) {
				1540	z = - z;
				1541	}
				1542	return z;
				1543
				1544	}
				1545
				1546	/*----------------------------------------------------------------------------
				1547	\| Returns the result of converting the single-precision floating-point value
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1548	\| `a' to the 64-bit two's complement integer format. The conversion is
				1549	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1550	\| Arithmetic---which means in particular that the conversion is rounded
				1551	\| according to the current rounding mode. If `a' is a NaN, the largest
				1552	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1553	\| largest integer with the same sign as `a' is returned.
				1554	----------------------------------------------------------------------------/
				1555
				1556	int64 float32_to_int64( float32 a STATUS_PARAM )
				1557	{
				1558	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1559	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1560	uint32_t aSig;
				1561	uint64_t aSig64, aSigExtra;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1562	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1563
				1564	aSig = extractFloat32Frac( a );
				1565	aExp = extractFloat32Exp( a );
				1566	aSign = extractFloat32Sign( a );
				1567	shiftCount = 0xBE - aExp;
				1568	if ( shiftCount < 0 ) {
				1569	float_raise( float_flag_invalid STATUS_VAR);
				1570	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1571	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1572	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1573	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1574	}
				1575	if ( aExp ) aSig \|= 0x00800000;
				1576	aSig64 = aSig;
				1577	aSig64 <<= 40;
				1578	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
				1579	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
				1580
				1581	}
				1582
				1583	/*----------------------------------------------------------------------------
				1584	\| Returns the result of converting the single-precision floating-point value
Tom Musta	2f18bbf	2014-01-07 17:17:50 +0000	[diff] [blame]	1585	\| `a' to the 64-bit unsigned integer format. The conversion is
				1586	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1587	\| Arithmetic---which means in particular that the conversion is rounded
				1588	\| according to the current rounding mode. If `a' is a NaN, the largest
				1589	\| unsigned integer is returned. Otherwise, if the conversion overflows, the
				1590	\| largest unsigned integer is returned. If the 'a' is negative, the result
				1591	\| is rounded and zero is returned; values that do not round to zero will
				1592	\| raise the inexact exception flag.
				1593	----------------------------------------------------------------------------/
				1594
				1595	uint64 float32_to_uint64(float32 a STATUS_PARAM)
				1596	{
				1597	flag aSign;
				1598	int_fast16_t aExp, shiftCount;
				1599	uint32_t aSig;
				1600	uint64_t aSig64, aSigExtra;
				1601	a = float32_squash_input_denormal(a STATUS_VAR);
				1602
				1603	aSig = extractFloat32Frac(a);
				1604	aExp = extractFloat32Exp(a);
				1605	aSign = extractFloat32Sign(a);
				1606	if ((aSign) && (aExp > 126)) {
				1607	float_raise(float_flag_invalid STATUS_VAR);
				1608	if (float32_is_any_nan(a)) {
				1609	return LIT64(0xFFFFFFFFFFFFFFFF);
				1610	} else {
				1611	return 0;
				1612	}
				1613	}
				1614	shiftCount = 0xBE - aExp;
				1615	if (aExp) {
				1616	aSig \|= 0x00800000;
				1617	}
				1618	if (shiftCount < 0) {
				1619	float_raise(float_flag_invalid STATUS_VAR);
				1620	return LIT64(0xFFFFFFFFFFFFFFFF);
				1621	}
				1622
				1623	aSig64 = aSig;
				1624	aSig64 <<= 40;
				1625	shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
				1626	return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
				1627	}
				1628
				1629	/*----------------------------------------------------------------------------
				1630	\| Returns the result of converting the single-precision floating-point value
Tom Musta	a13d448	2014-03-31 16:03:55 -0500	[diff] [blame]	1631	\| `a' to the 64-bit unsigned integer format. The conversion is
				1632	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1633	\| Arithmetic, except that the conversion is always rounded toward zero. If
				1634	\| `a' is a NaN, the largest unsigned integer is returned. Otherwise, if the
				1635	\| conversion overflows, the largest unsigned integer is returned. If the
				1636	\| 'a' is negative, the result is rounded and zero is returned; values that do
				1637	\| not round to zero will raise the inexact flag.
				1638	----------------------------------------------------------------------------/
				1639
				1640	uint64 float32_to_uint64_round_to_zero(float32 a STATUS_PARAM)
				1641	{
				1642	signed char current_rounding_mode = STATUS(float_rounding_mode);
				1643	set_float_rounding_mode(float_round_to_zero STATUS_VAR);
				1644	int64_t v = float32_to_uint64(a STATUS_VAR);
				1645	set_float_rounding_mode(current_rounding_mode STATUS_VAR);
				1646	return v;
				1647	}
				1648
				1649	/*----------------------------------------------------------------------------
				1650	\| Returns the result of converting the single-precision floating-point value
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1651	\| `a' to the 64-bit two's complement integer format. The conversion is
				1652	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1653	\| Arithmetic, except that the conversion is always rounded toward zero. If
				1654	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1655	\| conversion overflows, the largest integer with the same sign as `a' is
				1656	\| returned.
				1657	----------------------------------------------------------------------------/
				1658
				1659	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
				1660	{
				1661	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1662	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1663	uint32_t aSig;
				1664	uint64_t aSig64;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1665	int64 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1666	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1667
				1668	aSig = extractFloat32Frac( a );
				1669	aExp = extractFloat32Exp( a );
				1670	aSign = extractFloat32Sign( a );
				1671	shiftCount = aExp - 0xBE;
				1672	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1673	if ( float32_val(a) != 0xDF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1674	float_raise( float_flag_invalid STATUS_VAR);
				1675	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1676	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1677	}
				1678	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1679	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1680	}
				1681	else if ( aExp <= 0x7E ) {
				1682	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1683	return 0;
				1684	}
				1685	aSig64 = aSig \| 0x00800000;
				1686	aSig64 <<= 40;
				1687	z = aSig64>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1688	if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1689	STATUS(float_exception_flags) \|= float_flag_inexact;
				1690	}
				1691	if ( aSign ) z = - z;
				1692	return z;
				1693
				1694	}
				1695
				1696	/*----------------------------------------------------------------------------
				1697	\| Returns the result of converting the single-precision floating-point value
				1698	\| `a' to the double-precision floating-point format. The conversion is
				1699	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1700	\| Arithmetic.
				1701	----------------------------------------------------------------------------/
				1702
				1703	float64 float32_to_float64( float32 a STATUS_PARAM )
				1704	{
				1705	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1706	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1707	uint32_t aSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1708	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1709
				1710	aSig = extractFloat32Frac( a );
				1711	aExp = extractFloat32Exp( a );
				1712	aSign = extractFloat32Sign( a );
				1713	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1714	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1715	return packFloat64( aSign, 0x7FF, 0 );
				1716	}
				1717	if ( aExp == 0 ) {
				1718	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				1719	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1720	--aExp;
				1721	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1722	return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1723
				1724	}
				1725
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1726	/*----------------------------------------------------------------------------
				1727	\| Returns the result of converting the single-precision floating-point value
				1728	\| `a' to the extended double-precision floating-point format. The conversion
				1729	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1730	\| Arithmetic.
				1731	----------------------------------------------------------------------------/
				1732
				1733	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
				1734	{
				1735	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1736	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1737	uint32_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1738
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1739	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1740	aSig = extractFloat32Frac( a );
				1741	aExp = extractFloat32Exp( a );
				1742	aSign = extractFloat32Sign( a );
				1743	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1744	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1745	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1746	}
				1747	if ( aExp == 0 ) {
				1748	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1749	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1750	}
				1751	aSig \|= 0x00800000;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1752	return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1753
				1754	}
				1755
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1756	/*----------------------------------------------------------------------------
				1757	\| Returns the result of converting the single-precision floating-point value
				1758	\| `a' to the double-precision floating-point format. The conversion is
				1759	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1760	\| Arithmetic.
				1761	----------------------------------------------------------------------------/
				1762
				1763	float128 float32_to_float128( float32 a STATUS_PARAM )
				1764	{
				1765	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1766	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1767	uint32_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1768
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1769	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1770	aSig = extractFloat32Frac( a );
				1771	aExp = extractFloat32Exp( a );
				1772	aSign = extractFloat32Sign( a );
				1773	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1774	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1775	return packFloat128( aSign, 0x7FFF, 0, 0 );
				1776	}
				1777	if ( aExp == 0 ) {
				1778	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				1779	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1780	--aExp;
				1781	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1782	return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1783
				1784	}
				1785
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1786	/*----------------------------------------------------------------------------
				1787	\| Rounds the single-precision floating-point value `a' to an integer, and
				1788	\| returns the result as a single-precision floating-point value. The
				1789	\| operation is performed according to the IEC/IEEE Standard for Binary
				1790	\| Floating-Point Arithmetic.
				1791	----------------------------------------------------------------------------/
				1792
				1793	float32 float32_round_to_int( float32 a STATUS_PARAM)
				1794	{
				1795	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1796	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1797	uint32_t lastBitMask, roundBitsMask;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1798	uint32_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1799	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1800
				1801	aExp = extractFloat32Exp( a );
				1802	if ( 0x96 <= aExp ) {
				1803	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				1804	return propagateFloat32NaN( a, a STATUS_VAR );
				1805	}
				1806	return a;
				1807	}
				1808	if ( aExp <= 0x7E ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1809	if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1810	STATUS(float_exception_flags) \|= float_flag_inexact;
				1811	aSign = extractFloat32Sign( a );
				1812	switch ( STATUS(float_rounding_mode) ) {
				1813	case float_round_nearest_even:
				1814	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				1815	return packFloat32( aSign, 0x7F, 0 );
				1816	}
				1817	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	1818	case float_round_ties_away:
				1819	if (aExp == 0x7E) {
				1820	return packFloat32(aSign, 0x7F, 0);
				1821	}
				1822	break;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1823	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1824	return make_float32(aSign ? 0xBF800000 : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1825	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1826	return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1827	}
				1828	return packFloat32( aSign, 0, 0 );
				1829	}
				1830	lastBitMask = 1;
				1831	lastBitMask <<= 0x96 - aExp;
				1832	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1833	z = float32_val(a);
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1834	switch (STATUS(float_rounding_mode)) {
				1835	case float_round_nearest_even:
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1836	z += lastBitMask>>1;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1837	if ((z & roundBitsMask) == 0) {
				1838	z &= ~lastBitMask;
				1839	}
				1840	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	1841	case float_round_ties_away:
				1842	z += lastBitMask >> 1;
				1843	break;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1844	case float_round_to_zero:
				1845	break;
				1846	case float_round_up:
				1847	if (!extractFloat32Sign(make_float32(z))) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1848	z += roundBitsMask;
				1849	}
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	1850	break;
				1851	case float_round_down:
				1852	if (extractFloat32Sign(make_float32(z))) {
				1853	z += roundBitsMask;
				1854	}
				1855	break;
				1856	default:
				1857	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1858	}
				1859	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1860	if ( z != float32_val(a) ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1861	return make_float32(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1862
				1863	}
				1864
				1865	/*----------------------------------------------------------------------------
				1866	\| Returns the result of adding the absolute values of the single-precision
				1867	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				1868	\| before being returned. `zSign' is ignored if the result is a NaN.
				1869	\| The addition is performed according to the IEC/IEEE Standard for Binary
				1870	\| Floating-Point Arithmetic.
				1871	----------------------------------------------------------------------------/
				1872
				1873	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1874	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1875	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1876	uint32_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1877	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1878
				1879	aSig = extractFloat32Frac( a );
				1880	aExp = extractFloat32Exp( a );
				1881	bSig = extractFloat32Frac( b );
				1882	bExp = extractFloat32Exp( b );
				1883	expDiff = aExp - bExp;
				1884	aSig <<= 6;
				1885	bSig <<= 6;
				1886	if ( 0 < expDiff ) {
				1887	if ( aExp == 0xFF ) {
				1888	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1889	return a;
				1890	}
				1891	if ( bExp == 0 ) {
				1892	--expDiff;
				1893	}
				1894	else {
				1895	bSig \|= 0x20000000;
				1896	}
				1897	shift32RightJamming( bSig, expDiff, &bSig );
				1898	zExp = aExp;
				1899	}
				1900	else if ( expDiff < 0 ) {
				1901	if ( bExp == 0xFF ) {
				1902	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1903	return packFloat32( zSign, 0xFF, 0 );
				1904	}
				1905	if ( aExp == 0 ) {
				1906	++expDiff;
				1907	}
				1908	else {
				1909	aSig \|= 0x20000000;
				1910	}
				1911	shift32RightJamming( aSig, - expDiff, &aSig );
				1912	zExp = bExp;
				1913	}
				1914	else {
				1915	if ( aExp == 0xFF ) {
				1916	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1917	return a;
				1918	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	1919	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	1920	if (STATUS(flush_to_zero)) {
				1921	if (aSig \| bSig) {
				1922	float_raise(float_flag_output_denormal STATUS_VAR);
				1923	}
				1924	return packFloat32(zSign, 0, 0);
				1925	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	1926	return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1927	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1928	zSig = 0x40000000 + aSig + bSig;
				1929	zExp = aExp;
				1930	goto roundAndPack;
				1931	}
				1932	aSig \|= 0x20000000;
				1933	zSig = ( aSig + bSig )<<1;
				1934	--zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1935	if ( (int32_t) zSig < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1936	zSig = aSig + bSig;
				1937	++zExp;
				1938	}
				1939	roundAndPack:
				1940	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1941
				1942	}
				1943
				1944	/*----------------------------------------------------------------------------
				1945	\| Returns the result of subtracting the absolute values of the single-
				1946	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				1947	\| difference is negated before being returned. `zSign' is ignored if the
				1948	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				1949	\| Standard for Binary Floating-Point Arithmetic.
				1950	----------------------------------------------------------------------------/
				1951
				1952	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1953	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1954	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1955	uint32_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	1956	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1957
				1958	aSig = extractFloat32Frac( a );
				1959	aExp = extractFloat32Exp( a );
				1960	bSig = extractFloat32Frac( b );
				1961	bExp = extractFloat32Exp( b );
				1962	expDiff = aExp - bExp;
				1963	aSig <<= 7;
				1964	bSig <<= 7;
				1965	if ( 0 < expDiff ) goto aExpBigger;
				1966	if ( expDiff < 0 ) goto bExpBigger;
				1967	if ( aExp == 0xFF ) {
				1968	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1969	float_raise( float_flag_invalid STATUS_VAR);
				1970	return float32_default_nan;
				1971	}
				1972	if ( aExp == 0 ) {
				1973	aExp = 1;
				1974	bExp = 1;
				1975	}
				1976	if ( bSig < aSig ) goto aBigger;
				1977	if ( aSig < bSig ) goto bBigger;
				1978	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				1979	bExpBigger:
				1980	if ( bExp == 0xFF ) {
				1981	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1982	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1983	}
				1984	if ( aExp == 0 ) {
				1985	++expDiff;
				1986	}
				1987	else {
				1988	aSig \|= 0x40000000;
				1989	}
				1990	shift32RightJamming( aSig, - expDiff, &aSig );
				1991	bSig \|= 0x40000000;
				1992	bBigger:
				1993	zSig = bSig - aSig;
				1994	zExp = bExp;
				1995	zSign ^= 1;
				1996	goto normalizeRoundAndPack;
				1997	aExpBigger:
				1998	if ( aExp == 0xFF ) {
				1999	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2000	return a;
				2001	}
				2002	if ( bExp == 0 ) {
				2003	--expDiff;
				2004	}
				2005	else {
				2006	bSig \|= 0x40000000;
				2007	}
				2008	shift32RightJamming( bSig, expDiff, &bSig );
				2009	aSig \|= 0x40000000;
				2010	aBigger:
				2011	zSig = aSig - bSig;
				2012	zExp = aExp;
				2013	normalizeRoundAndPack:
				2014	--zExp;
				2015	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				2016
				2017	}
				2018
				2019	/*----------------------------------------------------------------------------
				2020	\| Returns the result of adding the single-precision floating-point values `a'
				2021	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				2022	\| Binary Floating-Point Arithmetic.
				2023	----------------------------------------------------------------------------/
				2024
				2025	float32 float32_add( float32 a, float32 b STATUS_PARAM )
				2026	{
				2027	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2028	a = float32_squash_input_denormal(a STATUS_VAR);
				2029	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2030
				2031	aSign = extractFloat32Sign( a );
				2032	bSign = extractFloat32Sign( b );
				2033	if ( aSign == bSign ) {
				2034	return addFloat32Sigs( a, b, aSign STATUS_VAR);
				2035	}
				2036	else {
				2037	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				2038	}
				2039
				2040	}
				2041
				2042	/*----------------------------------------------------------------------------
				2043	\| Returns the result of subtracting the single-precision floating-point values
				2044	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2045	\| for Binary Floating-Point Arithmetic.
				2046	----------------------------------------------------------------------------/
				2047
				2048	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
				2049	{
				2050	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2051	a = float32_squash_input_denormal(a STATUS_VAR);
				2052	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2053
				2054	aSign = extractFloat32Sign( a );
				2055	bSign = extractFloat32Sign( b );
				2056	if ( aSign == bSign ) {
				2057	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				2058	}
				2059	else {
				2060	return addFloat32Sigs( a, b, aSign STATUS_VAR );
				2061	}
				2062
				2063	}
				2064
				2065	/*----------------------------------------------------------------------------
				2066	\| Returns the result of multiplying the single-precision floating-point values
				2067	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2068	\| for Binary Floating-Point Arithmetic.
				2069	----------------------------------------------------------------------------/
				2070
				2071	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
				2072	{
				2073	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2074	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2075	uint32_t aSig, bSig;
				2076	uint64_t zSig64;
				2077	uint32_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2078
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2079	a = float32_squash_input_denormal(a STATUS_VAR);
				2080	b = float32_squash_input_denormal(b STATUS_VAR);
				2081
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2082	aSig = extractFloat32Frac( a );
				2083	aExp = extractFloat32Exp( a );
				2084	aSign = extractFloat32Sign( a );
				2085	bSig = extractFloat32Frac( b );
				2086	bExp = extractFloat32Exp( b );
				2087	bSign = extractFloat32Sign( b );
				2088	zSign = aSign ^ bSign;
				2089	if ( aExp == 0xFF ) {
				2090	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				2091	return propagateFloat32NaN( a, b STATUS_VAR );
				2092	}
				2093	if ( ( bExp \| bSig ) == 0 ) {
				2094	float_raise( float_flag_invalid STATUS_VAR);
				2095	return float32_default_nan;
				2096	}
				2097	return packFloat32( zSign, 0xFF, 0 );
				2098	}
				2099	if ( bExp == 0xFF ) {
				2100	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2101	if ( ( aExp \| aSig ) == 0 ) {
				2102	float_raise( float_flag_invalid STATUS_VAR);
				2103	return float32_default_nan;
				2104	}
				2105	return packFloat32( zSign, 0xFF, 0 );
				2106	}
				2107	if ( aExp == 0 ) {
				2108	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				2109	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2110	}
				2111	if ( bExp == 0 ) {
				2112	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				2113	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				2114	}
				2115	zExp = aExp + bExp - 0x7F;
				2116	aSig = ( aSig \| 0x00800000 )<<7;
				2117	bSig = ( bSig \| 0x00800000 )<<8;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2118	shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2119	zSig = zSig64;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2120	if ( 0 <= (int32_t) ( zSig<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2121	zSig <<= 1;
				2122	--zExp;
				2123	}
				2124	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				2125
				2126	}
				2127
				2128	/*----------------------------------------------------------------------------
				2129	\| Returns the result of dividing the single-precision floating-point value `a'
				2130	\| by the corresponding value `b'. The operation is performed according to the
				2131	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2132	----------------------------------------------------------------------------/
				2133
				2134	float32 float32_div( float32 a, float32 b STATUS_PARAM )
				2135	{
				2136	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2137	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2138	uint32_t aSig, bSig, zSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2139	a = float32_squash_input_denormal(a STATUS_VAR);
				2140	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2141
				2142	aSig = extractFloat32Frac( a );
				2143	aExp = extractFloat32Exp( a );
				2144	aSign = extractFloat32Sign( a );
				2145	bSig = extractFloat32Frac( b );
				2146	bExp = extractFloat32Exp( b );
				2147	bSign = extractFloat32Sign( b );
				2148	zSign = aSign ^ bSign;
				2149	if ( aExp == 0xFF ) {
				2150	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2151	if ( bExp == 0xFF ) {
				2152	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2153	float_raise( float_flag_invalid STATUS_VAR);
				2154	return float32_default_nan;
				2155	}
				2156	return packFloat32( zSign, 0xFF, 0 );
				2157	}
				2158	if ( bExp == 0xFF ) {
				2159	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2160	return packFloat32( zSign, 0, 0 );
				2161	}
				2162	if ( bExp == 0 ) {
				2163	if ( bSig == 0 ) {
				2164	if ( ( aExp \| aSig ) == 0 ) {
				2165	float_raise( float_flag_invalid STATUS_VAR);
				2166	return float32_default_nan;
				2167	}
				2168	float_raise( float_flag_divbyzero STATUS_VAR);
				2169	return packFloat32( zSign, 0xFF, 0 );
				2170	}
				2171	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				2172	}
				2173	if ( aExp == 0 ) {
				2174	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				2175	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2176	}
				2177	zExp = aExp - bExp + 0x7D;
				2178	aSig = ( aSig \| 0x00800000 )<<7;
				2179	bSig = ( bSig \| 0x00800000 )<<8;
				2180	if ( bSig <= ( aSig + aSig ) ) {
				2181	aSig >>= 1;
				2182	++zExp;
				2183	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2184	zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2185	if ( ( zSig & 0x3F ) == 0 ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2186	zSig \|= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2187	}
				2188	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				2189
				2190	}
				2191
				2192	/*----------------------------------------------------------------------------
				2193	\| Returns the remainder of the single-precision floating-point value `a'
				2194	\| with respect to the corresponding value `b'. The operation is performed
				2195	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2196	----------------------------------------------------------------------------/
				2197
				2198	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
				2199	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	2200	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2201	int_fast16_t aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2202	uint32_t aSig, bSig;
				2203	uint32_t q;
				2204	uint64_t aSig64, bSig64, q64;
				2205	uint32_t alternateASig;
				2206	int32_t sigMean;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2207	a = float32_squash_input_denormal(a STATUS_VAR);
				2208	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2209
				2210	aSig = extractFloat32Frac( a );
				2211	aExp = extractFloat32Exp( a );
				2212	aSign = extractFloat32Sign( a );
				2213	bSig = extractFloat32Frac( b );
				2214	bExp = extractFloat32Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2215	if ( aExp == 0xFF ) {
				2216	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				2217	return propagateFloat32NaN( a, b STATUS_VAR );
				2218	}
				2219	float_raise( float_flag_invalid STATUS_VAR);
				2220	return float32_default_nan;
				2221	}
				2222	if ( bExp == 0xFF ) {
				2223	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2224	return a;
				2225	}
				2226	if ( bExp == 0 ) {
				2227	if ( bSig == 0 ) {
				2228	float_raise( float_flag_invalid STATUS_VAR);
				2229	return float32_default_nan;
				2230	}
				2231	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				2232	}
				2233	if ( aExp == 0 ) {
				2234	if ( aSig == 0 ) return a;
				2235	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2236	}
				2237	expDiff = aExp - bExp;
				2238	aSig \|= 0x00800000;
				2239	bSig \|= 0x00800000;
				2240	if ( expDiff < 32 ) {
				2241	aSig <<= 8;
				2242	bSig <<= 8;
				2243	if ( expDiff < 0 ) {
				2244	if ( expDiff < -1 ) return a;
				2245	aSig >>= 1;
				2246	}
				2247	q = ( bSig <= aSig );
				2248	if ( q ) aSig -= bSig;
				2249	if ( 0 < expDiff ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2250	q = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2251	q >>= 32 - expDiff;
				2252	bSig >>= 2;
				2253	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2254	}
				2255	else {
				2256	aSig >>= 2;
				2257	bSig >>= 2;
				2258	}
				2259	}
				2260	else {
				2261	if ( bSig <= aSig ) aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2262	aSig64 = ( (uint64_t) aSig )<<40;
				2263	bSig64 = ( (uint64_t) bSig )<<40;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2264	expDiff -= 64;
				2265	while ( 0 < expDiff ) {
				2266	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				2267	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				2268	aSig64 = - ( ( bSig * q64 )<<38 );
				2269	expDiff -= 62;
				2270	}
				2271	expDiff += 64;
				2272	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				2273	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				2274	q = q64>>( 64 - expDiff );
				2275	bSig <<= 6;
				2276	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				2277	}
				2278	do {
				2279	alternateASig = aSig;
				2280	++q;
				2281	aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2282	} while ( 0 <= (int32_t) aSig );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2283	sigMean = aSig + alternateASig;
				2284	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2285	aSig = alternateASig;
				2286	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2287	zSign = ( (int32_t) aSig < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2288	if ( zSign ) aSig = - aSig;
				2289	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
				2290
				2291	}
				2292
				2293	/*----------------------------------------------------------------------------
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2294	\| Returns the result of multiplying the single-precision floating-point values
				2295	\| `a' and `b' then adding 'c', with no intermediate rounding step after the
				2296	\| multiplication. The operation is performed according to the IEC/IEEE
				2297	\| Standard for Binary Floating-Point Arithmetic 754-2008.
				2298	\| The flags argument allows the caller to select negation of the
				2299	\| addend, the intermediate product, or the final result. (The difference
				2300	\| between this and having the caller do a separate negation is that negating
				2301	\| externally will flip the sign bit on NaNs.)
				2302	----------------------------------------------------------------------------/
				2303
				2304	float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
				2305	{
				2306	flag aSign, bSign, cSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2307	int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2308	uint32_t aSig, bSig, cSig;
				2309	flag pInf, pZero, pSign;
				2310	uint64_t pSig64, cSig64, zSig64;
				2311	uint32_t pSig;
				2312	int shiftcount;
				2313	flag signflip, infzero;
				2314
				2315	a = float32_squash_input_denormal(a STATUS_VAR);
				2316	b = float32_squash_input_denormal(b STATUS_VAR);
				2317	c = float32_squash_input_denormal(c STATUS_VAR);
				2318	aSig = extractFloat32Frac(a);
				2319	aExp = extractFloat32Exp(a);
				2320	aSign = extractFloat32Sign(a);
				2321	bSig = extractFloat32Frac(b);
				2322	bExp = extractFloat32Exp(b);
				2323	bSign = extractFloat32Sign(b);
				2324	cSig = extractFloat32Frac(c);
				2325	cExp = extractFloat32Exp(c);
				2326	cSign = extractFloat32Sign(c);
				2327
				2328	infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) \|\|
				2329	(aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
				2330
				2331	/* It is implementation-defined whether the cases of (0,inf,qnan)
				2332	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
				2333	* they return if they do), so we have to hand this information
				2334	* off to the target-specific pick-a-NaN routine.
				2335	*/
				2336	if (((aExp == 0xff) && aSig) \|\|
				2337	((bExp == 0xff) && bSig) \|\|
				2338	((cExp == 0xff) && cSig)) {
				2339	return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
				2340	}
				2341
				2342	if (infzero) {
				2343	float_raise(float_flag_invalid STATUS_VAR);
				2344	return float32_default_nan;
				2345	}
				2346
				2347	if (flags & float_muladd_negate_c) {
				2348	cSign ^= 1;
				2349	}
				2350
				2351	signflip = (flags & float_muladd_negate_result) ? 1 : 0;
				2352
				2353	/* Work out the sign and type of the product */
				2354	pSign = aSign ^ bSign;
				2355	if (flags & float_muladd_negate_product) {
				2356	pSign ^= 1;
				2357	}
				2358	pInf = (aExp == 0xff) \|\| (bExp == 0xff);
				2359	pZero = ((aExp \| aSig) == 0) \|\| ((bExp \| bSig) == 0);
				2360
				2361	if (cExp == 0xff) {
				2362	if (pInf && (pSign ^ cSign)) {
				2363	/* addition of opposite-signed infinities => InvalidOperation */
				2364	float_raise(float_flag_invalid STATUS_VAR);
				2365	return float32_default_nan;
				2366	}
				2367	/* Otherwise generate an infinity of the same sign */
				2368	return packFloat32(cSign ^ signflip, 0xff, 0);
				2369	}
				2370
				2371	if (pInf) {
				2372	return packFloat32(pSign ^ signflip, 0xff, 0);
				2373	}
				2374
				2375	if (pZero) {
				2376	if (cExp == 0) {
				2377	if (cSig == 0) {
				2378	/* Adding two exact zeroes */
				2379	if (pSign == cSign) {
				2380	zSign = pSign;
				2381	} else if (STATUS(float_rounding_mode) == float_round_down) {
				2382	zSign = 1;
				2383	} else {
				2384	zSign = 0;
				2385	}
				2386	return packFloat32(zSign ^ signflip, 0, 0);
				2387	}
				2388	/* Exact zero plus a denorm */
				2389	if (STATUS(flush_to_zero)) {
				2390	float_raise(float_flag_output_denormal STATUS_VAR);
				2391	return packFloat32(cSign ^ signflip, 0, 0);
				2392	}
				2393	}
				2394	/* Zero plus something non-zero : just return the something */
Peter Maydell	67d4353	2014-02-20 10:35:50 +0000	[diff] [blame]	2395	if (flags & float_muladd_halve_result) {
				2396	if (cExp == 0) {
				2397	normalizeFloat32Subnormal(cSig, &cExp, &cSig);
				2398	}
				2399	/* Subtract one to halve, and one again because roundAndPackFloat32
				2400	* wants one less than the true exponent.
				2401	*/
				2402	cExp -= 2;
				2403	cSig = (cSig \| 0x00800000) << 7;
				2404	return roundAndPackFloat32(cSign ^ signflip, cExp, cSig STATUS_VAR);
				2405	}
Richard Sandiford	a6e7c18	2013-01-22 17:03:05 +0000	[diff] [blame]	2406	return packFloat32(cSign ^ signflip, cExp, cSig);
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2407	}
				2408
				2409	if (aExp == 0) {
				2410	normalizeFloat32Subnormal(aSig, &aExp, &aSig);
				2411	}
				2412	if (bExp == 0) {
				2413	normalizeFloat32Subnormal(bSig, &bExp, &bSig);
				2414	}
				2415
				2416	/* Calculate the actual result a * b + c */
				2417
				2418	/* Multiply first; this is easy. */
				2419	/* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
				2420	* because we want the true exponent, not the "one-less-than"
				2421	* flavour that roundAndPackFloat32() takes.
				2422	*/
				2423	pExp = aExp + bExp - 0x7e;
				2424	aSig = (aSig \| 0x00800000) << 7;
				2425	bSig = (bSig \| 0x00800000) << 8;
				2426	pSig64 = (uint64_t)aSig * bSig;
				2427	if ((int64_t)(pSig64 << 1) >= 0) {
				2428	pSig64 <<= 1;
				2429	pExp--;
				2430	}
				2431
				2432	zSign = pSign ^ signflip;
				2433
				2434	/* Now pSig64 is the significand of the multiply, with the explicit bit in
				2435	* position 62.
				2436	*/
				2437	if (cExp == 0) {
				2438	if (!cSig) {
				2439	/* Throw out the special case of c being an exact zero now */
				2440	shift64RightJamming(pSig64, 32, &pSig64);
				2441	pSig = pSig64;
Peter Maydell	67d4353	2014-02-20 10:35:50 +0000	[diff] [blame]	2442	if (flags & float_muladd_halve_result) {
				2443	pExp--;
				2444	}
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2445	return roundAndPackFloat32(zSign, pExp - 1,
				2446	pSig STATUS_VAR);
				2447	}
				2448	normalizeFloat32Subnormal(cSig, &cExp, &cSig);
				2449	}
				2450
				2451	cSig64 = (uint64_t)cSig << (62 - 23);
				2452	cSig64 \|= LIT64(0x4000000000000000);
				2453	expDiff = pExp - cExp;
				2454
				2455	if (pSign == cSign) {
				2456	/* Addition */
				2457	if (expDiff > 0) {
				2458	/* scale c to match p */
				2459	shift64RightJamming(cSig64, expDiff, &cSig64);
				2460	zExp = pExp;
				2461	} else if (expDiff < 0) {
				2462	/* scale p to match c */
				2463	shift64RightJamming(pSig64, -expDiff, &pSig64);
				2464	zExp = cExp;
				2465	} else {
				2466	/* no scaling needed */
				2467	zExp = cExp;
				2468	}
				2469	/* Add significands and make sure explicit bit ends up in posn 62 */
				2470	zSig64 = pSig64 + cSig64;
				2471	if ((int64_t)zSig64 < 0) {
				2472	shift64RightJamming(zSig64, 1, &zSig64);
				2473	} else {
				2474	zExp--;
				2475	}
				2476	} else {
				2477	/* Subtraction */
				2478	if (expDiff > 0) {
				2479	shift64RightJamming(cSig64, expDiff, &cSig64);
				2480	zSig64 = pSig64 - cSig64;
				2481	zExp = pExp;
				2482	} else if (expDiff < 0) {
				2483	shift64RightJamming(pSig64, -expDiff, &pSig64);
				2484	zSig64 = cSig64 - pSig64;
				2485	zExp = cExp;
				2486	zSign ^= 1;
				2487	} else {
				2488	zExp = pExp;
				2489	if (cSig64 < pSig64) {
				2490	zSig64 = pSig64 - cSig64;
				2491	} else if (pSig64 < cSig64) {
				2492	zSig64 = cSig64 - pSig64;
				2493	zSign ^= 1;
				2494	} else {
				2495	/* Exact zero */
				2496	zSign = signflip;
				2497	if (STATUS(float_rounding_mode) == float_round_down) {
				2498	zSign ^= 1;
				2499	}
				2500	return packFloat32(zSign, 0, 0);
				2501	}
				2502	}
				2503	--zExp;
				2504	/* Normalize to put the explicit bit back into bit 62. */
				2505	shiftcount = countLeadingZeros64(zSig64) - 1;
				2506	zSig64 <<= shiftcount;
				2507	zExp -= shiftcount;
				2508	}
Peter Maydell	67d4353	2014-02-20 10:35:50 +0000	[diff] [blame]	2509	if (flags & float_muladd_halve_result) {
				2510	zExp--;
				2511	}
				2512
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2513	shift64RightJamming(zSig64, 32, &zSig64);
				2514	return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
				2515	}
				2516
				2517
				2518	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2519	\| Returns the square root of the single-precision floating-point value `a'.
				2520	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2521	\| Floating-Point Arithmetic.
				2522	----------------------------------------------------------------------------/
				2523
				2524	float32 float32_sqrt( float32 a STATUS_PARAM )
				2525	{
				2526	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2527	int_fast16_t aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2528	uint32_t aSig, zSig;
				2529	uint64_t rem, term;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2530	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2531
				2532	aSig = extractFloat32Frac( a );
				2533	aExp = extractFloat32Exp( a );
				2534	aSign = extractFloat32Sign( a );
				2535	if ( aExp == 0xFF ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2536	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2537	if ( ! aSign ) return a;
				2538	float_raise( float_flag_invalid STATUS_VAR);
				2539	return float32_default_nan;
				2540	}
				2541	if ( aSign ) {
				2542	if ( ( aExp \| aSig ) == 0 ) return a;
				2543	float_raise( float_flag_invalid STATUS_VAR);
				2544	return float32_default_nan;
				2545	}
				2546	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2547	if ( aSig == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2548	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2549	}
				2550	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				2551	aSig = ( aSig \| 0x00800000 )<<8;
				2552	zSig = estimateSqrt32( aExp, aSig ) + 2;
				2553	if ( ( zSig & 0x7F ) <= 5 ) {
				2554	if ( zSig < 2 ) {
				2555	zSig = 0x7FFFFFFF;
				2556	goto roundAndPack;
				2557	}
				2558	aSig >>= aExp & 1;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2559	term = ( (uint64_t) zSig ) * zSig;
				2560	rem = ( ( (uint64_t) aSig )<<32 ) - term;
				2561	while ( (int64_t) rem < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2562	--zSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2563	rem += ( ( (uint64_t) zSig )<<1 ) \| 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2564	}
				2565	zSig \|= ( rem != 0 );
				2566	}
				2567	shift32RightJamming( zSig, 1, &zSig );
				2568	roundAndPack:
				2569	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
				2570
				2571	}
				2572
				2573	/*----------------------------------------------------------------------------
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2574	\| Returns the binary exponential of the single-precision floating-point value
				2575	\| `a'. The operation is performed according to the IEC/IEEE Standard for
				2576	\| Binary Floating-Point Arithmetic.
				2577	\|
				2578	\| Uses the following identities:
				2579	\|
				2580	\| 1. -------------------------------------------------------------------------
				2581	\| x x*ln(2)
				2582	\| 2 = e
				2583	\|
				2584	\| 2. -------------------------------------------------------------------------
				2585	\| 2 3 4 5 n
				2586	\| x x x x x x x
				2587	\| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
				2588	\| 1! 2! 3! 4! 5! n!
				2589	----------------------------------------------------------------------------/
				2590
				2591	static const float64 float32_exp2_coefficients[15] =
				2592	{
Peter Maydell	d5138cf	2011-02-10 13:59:34 +0000	[diff] [blame]	2593	const_float64( 0x3ff0000000000000ll ), /* 1 */
				2594	const_float64( 0x3fe0000000000000ll ), /* 2 */
				2595	const_float64( 0x3fc5555555555555ll ), /* 3 */
				2596	const_float64( 0x3fa5555555555555ll ), /* 4 */
				2597	const_float64( 0x3f81111111111111ll ), /* 5 */
				2598	const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
				2599	const_float64( 0x3f2a01a01a01a01all ), /* 7 */
				2600	const_float64( 0x3efa01a01a01a01all ), /* 8 */
				2601	const_float64( 0x3ec71de3a556c734ll ), /* 9 */
				2602	const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
				2603	const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
				2604	const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
				2605	const_float64( 0x3de6124613a86d09ll ), /* 13 */
				2606	const_float64( 0x3da93974a8c07c9dll ), /* 14 */
				2607	const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2608	};
				2609
				2610	float32 float32_exp2( float32 a STATUS_PARAM )
				2611	{
				2612	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2613	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2614	uint32_t aSig;
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2615	float64 r, x, xn;
				2616	int i;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2617	a = float32_squash_input_denormal(a STATUS_VAR);
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2618
				2619	aSig = extractFloat32Frac( a );
				2620	aExp = extractFloat32Exp( a );
				2621	aSign = extractFloat32Sign( a );
				2622
				2623	if ( aExp == 0xFF) {
				2624	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
				2625	return (aSign) ? float32_zero : a;
				2626	}
				2627	if (aExp == 0) {
				2628	if (aSig == 0) return float32_one;
				2629	}
				2630
				2631	float_raise( float_flag_inexact STATUS_VAR);
				2632
				2633	/* ******************************* */
				2634	/* using float64 for approximation */
				2635	/* ******************************* */
				2636	x = float32_to_float64(a STATUS_VAR);
				2637	x = float64_mul(x, float64_ln2 STATUS_VAR);
				2638
				2639	xn = x;
				2640	r = float64_one;
				2641	for (i = 0 ; i < 15 ; i++) {
				2642	float64 f;
				2643
				2644	f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
				2645	r = float64_add(r, f STATUS_VAR);
				2646
				2647	xn = float64_mul(xn, x STATUS_VAR);
				2648	}
				2649
				2650	return float64_to_float32(r, status);
				2651	}
				2652
				2653	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2654	\| Returns the binary log of the single-precision floating-point value `a'.
				2655	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2656	\| Floating-Point Arithmetic.
				2657	----------------------------------------------------------------------------/
				2658	float32 float32_log2( float32 a STATUS_PARAM )
				2659	{
				2660	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2661	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2662	uint32_t aSig, zSig, i;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2663
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2664	a = float32_squash_input_denormal(a STATUS_VAR);
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2665	aSig = extractFloat32Frac( a );
				2666	aExp = extractFloat32Exp( a );
				2667	aSign = extractFloat32Sign( a );
				2668
				2669	if ( aExp == 0 ) {
				2670	if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
				2671	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2672	}
				2673	if ( aSign ) {
				2674	float_raise( float_flag_invalid STATUS_VAR);
				2675	return float32_default_nan;
				2676	}
				2677	if ( aExp == 0xFF ) {
				2678	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
				2679	return a;
				2680	}
				2681
				2682	aExp -= 0x7F;
				2683	aSig \|= 0x00800000;
				2684	zSign = aExp < 0;
				2685	zSig = aExp << 23;
				2686
				2687	for (i = 1 << 22; i > 0; i >>= 1) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2688	aSig = ( (uint64_t)aSig * aSig ) >> 23;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2689	if ( aSig & 0x01000000 ) {
				2690	aSig >>= 1;
				2691	zSig \|= i;
				2692	}
				2693	}
				2694
				2695	if ( zSign )
				2696	zSig = -zSig;
				2697
				2698	return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
				2699	}
				2700
				2701	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2702	\| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2703	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2704	\| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2705	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2706	----------------------------------------------------------------------------/
				2707
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2708	int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2709	{
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2710	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2711	a = float32_squash_input_denormal(a STATUS_VAR);
				2712	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2713
				2714	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2715	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2716	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2717	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2718	return 0;
				2719	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2720	av = float32_val(a);
				2721	bv = float32_val(b);
				2722	return ( av == bv ) \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2723	}
				2724
				2725	/*----------------------------------------------------------------------------
				2726	\| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2727	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
				2728	\| exception is raised if either operand is a NaN. The comparison is performed
				2729	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2730	----------------------------------------------------------------------------/
				2731
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2732	int float32_le( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2733	{
				2734	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2735	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2736	a = float32_squash_input_denormal(a STATUS_VAR);
				2737	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2738
				2739	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2740	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2741	) {
				2742	float_raise( float_flag_invalid STATUS_VAR);
				2743	return 0;
				2744	}
				2745	aSign = extractFloat32Sign( a );
				2746	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2747	av = float32_val(a);
				2748	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2749	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2750	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2751
				2752	}
				2753
				2754	/*----------------------------------------------------------------------------
				2755	\| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2756	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2757	\| raised if either operand is a NaN. The comparison is performed according
				2758	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2759	----------------------------------------------------------------------------/
				2760
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2761	int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2762	{
				2763	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2764	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2765	a = float32_squash_input_denormal(a STATUS_VAR);
				2766	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2767
				2768	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2769	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2770	) {
				2771	float_raise( float_flag_invalid STATUS_VAR);
				2772	return 0;
				2773	}
				2774	aSign = extractFloat32Sign( a );
				2775	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2776	av = float32_val(a);
				2777	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2778	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2779	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2780
				2781	}
				2782
				2783	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2784	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2785	\| be compared, and 0 otherwise. The invalid exception is raised if either
				2786	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				2787	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2788	----------------------------------------------------------------------------/
				2789
				2790	int float32_unordered( float32 a, float32 b STATUS_PARAM )
				2791	{
				2792	a = float32_squash_input_denormal(a STATUS_VAR);
				2793	b = float32_squash_input_denormal(b STATUS_VAR);
				2794
				2795	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2796	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2797	) {
				2798	float_raise( float_flag_invalid STATUS_VAR);
				2799	return 1;
				2800	}
				2801	return 0;
				2802	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2803
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2804	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2805	\| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2806	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2807	\| exception. The comparison is performed according to the IEC/IEEE Standard
				2808	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2809	----------------------------------------------------------------------------/
				2810
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2811	int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2812	{
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2813	a = float32_squash_input_denormal(a STATUS_VAR);
				2814	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2815
				2816	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2817	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2818	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2819	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2820	float_raise( float_flag_invalid STATUS_VAR);
				2821	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2822	return 0;
				2823	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2824	return ( float32_val(a) == float32_val(b) ) \|\|
				2825	( (uint32_t) ( ( float32_val(a) \| float32_val(b) )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2826	}
				2827
				2828	/*----------------------------------------------------------------------------
				2829	\| Returns 1 if the single-precision floating-point value `a' is less than or
				2830	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2831	\| cause an exception. Otherwise, the comparison is performed according to the
				2832	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2833	----------------------------------------------------------------------------/
				2834
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2835	int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2836	{
				2837	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2838	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2839	a = float32_squash_input_denormal(a STATUS_VAR);
				2840	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2841
				2842	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2843	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2844	) {
				2845	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2846	float_raise( float_flag_invalid STATUS_VAR);
				2847	}
				2848	return 0;
				2849	}
				2850	aSign = extractFloat32Sign( a );
				2851	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2852	av = float32_val(a);
				2853	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2854	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2855	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2856
				2857	}
				2858
				2859	/*----------------------------------------------------------------------------
				2860	\| Returns 1 if the single-precision floating-point value `a' is less than
				2861	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2862	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2863	\| Standard for Binary Floating-Point Arithmetic.
				2864	----------------------------------------------------------------------------/
				2865
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2866	int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2867	{
				2868	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2869	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2870	a = float32_squash_input_denormal(a STATUS_VAR);
				2871	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2872
				2873	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2874	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2875	) {
				2876	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2877	float_raise( float_flag_invalid STATUS_VAR);
				2878	}
				2879	return 0;
				2880	}
				2881	aSign = extractFloat32Sign( a );
				2882	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2883	av = float32_val(a);
				2884	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2885	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2886	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2887
				2888	}
				2889
				2890	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2891	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
				2892	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				2893	\| comparison is performed according to the IEC/IEEE Standard for Binary
				2894	\| Floating-Point Arithmetic.
				2895	----------------------------------------------------------------------------/
				2896
				2897	int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
				2898	{
				2899	a = float32_squash_input_denormal(a STATUS_VAR);
				2900	b = float32_squash_input_denormal(b STATUS_VAR);
				2901
				2902	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2903	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2904	) {
				2905	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2906	float_raise( float_flag_invalid STATUS_VAR);
				2907	}
				2908	return 1;
				2909	}
				2910	return 0;
				2911	}
				2912
				2913	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2914	\| Returns the result of converting the double-precision floating-point value
				2915	\| `a' to the 32-bit two's complement integer format. The conversion is
				2916	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2917	\| Arithmetic---which means in particular that the conversion is rounded
				2918	\| according to the current rounding mode. If `a' is a NaN, the largest
				2919	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2920	\| largest integer with the same sign as `a' is returned.
				2921	----------------------------------------------------------------------------/
				2922
				2923	int32 float64_to_int32( float64 a STATUS_PARAM )
				2924	{
				2925	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2926	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2927	uint64_t aSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2928	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2929
				2930	aSig = extractFloat64Frac( a );
				2931	aExp = extractFloat64Exp( a );
				2932	aSign = extractFloat64Sign( a );
				2933	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2934	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2935	shiftCount = 0x42C - aExp;
				2936	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				2937	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				2938
				2939	}
				2940
				2941	/*----------------------------------------------------------------------------
				2942	\| Returns the result of converting the double-precision floating-point value
				2943	\| `a' to the 32-bit two's complement integer format. The conversion is
				2944	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2945	\| Arithmetic, except that the conversion is always rounded toward zero.
				2946	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2947	\| the conversion overflows, the largest integer with the same sign as `a' is
				2948	\| returned.
				2949	----------------------------------------------------------------------------/
				2950
				2951	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
				2952	{
				2953	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2954	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2955	uint64_t aSig, savedASig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	2956	int32_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2957	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2958
				2959	aSig = extractFloat64Frac( a );
				2960	aExp = extractFloat64Exp( a );
				2961	aSign = extractFloat64Sign( a );
				2962	if ( 0x41E < aExp ) {
				2963	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2964	goto invalid;
				2965	}
				2966	else if ( aExp < 0x3FF ) {
				2967	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2968	return 0;
				2969	}
				2970	aSig \|= LIT64( 0x0010000000000000 );
				2971	shiftCount = 0x433 - aExp;
				2972	savedASig = aSig;
				2973	aSig >>= shiftCount;
				2974	z = aSig;
				2975	if ( aSign ) z = - z;
				2976	if ( ( z < 0 ) ^ aSign ) {
				2977	invalid:
				2978	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2979	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2980	}
				2981	if ( ( aSig<<shiftCount ) != savedASig ) {
				2982	STATUS(float_exception_flags) \|= float_flag_inexact;
				2983	}
				2984	return z;
				2985
				2986	}
				2987
				2988	/*----------------------------------------------------------------------------
				2989	\| Returns the result of converting the double-precision floating-point value
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2990	\| `a' to the 16-bit two's complement integer format. The conversion is
				2991	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2992	\| Arithmetic, except that the conversion is always rounded toward zero.
				2993	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2994	\| the conversion overflows, the largest integer with the same sign as `a' is
				2995	\| returned.
				2996	----------------------------------------------------------------------------/
				2997
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	2998	int_fast16_t float64_to_int16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2999	{
				3000	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3001	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3002	uint64_t aSig, savedASig;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	3003	int32 z;
				3004
				3005	aSig = extractFloat64Frac( a );
				3006	aExp = extractFloat64Exp( a );
				3007	aSign = extractFloat64Sign( a );
				3008	if ( 0x40E < aExp ) {
				3009	if ( ( aExp == 0x7FF ) && aSig ) {
				3010	aSign = 0;
				3011	}
				3012	goto invalid;
				3013	}
				3014	else if ( aExp < 0x3FF ) {
				3015	if ( aExp \|\| aSig ) {
				3016	STATUS(float_exception_flags) \|= float_flag_inexact;
				3017	}
				3018	return 0;
				3019	}
				3020	aSig \|= LIT64( 0x0010000000000000 );
				3021	shiftCount = 0x433 - aExp;
				3022	savedASig = aSig;
				3023	aSig >>= shiftCount;
				3024	z = aSig;
				3025	if ( aSign ) {
				3026	z = - z;
				3027	}
				3028	if ( ( (int16_t)z < 0 ) ^ aSign ) {
				3029	invalid:
				3030	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3031	return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	3032	}
				3033	if ( ( aSig<<shiftCount ) != savedASig ) {
				3034	STATUS(float_exception_flags) \|= float_flag_inexact;
				3035	}
				3036	return z;
				3037	}
				3038
				3039	/*----------------------------------------------------------------------------
				3040	\| Returns the result of converting the double-precision floating-point value
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3041	\| `a' to the 64-bit two's complement integer format. The conversion is
				3042	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3043	\| Arithmetic---which means in particular that the conversion is rounded
				3044	\| according to the current rounding mode. If `a' is a NaN, the largest
				3045	\| positive integer is returned. Otherwise, if the conversion overflows, the
				3046	\| largest integer with the same sign as `a' is returned.
				3047	----------------------------------------------------------------------------/
				3048
				3049	int64 float64_to_int64( float64 a STATUS_PARAM )
				3050	{
				3051	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3052	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3053	uint64_t aSig, aSigExtra;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3054	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3055
				3056	aSig = extractFloat64Frac( a );
				3057	aExp = extractFloat64Exp( a );
				3058	aSign = extractFloat64Sign( a );
				3059	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				3060	shiftCount = 0x433 - aExp;
				3061	if ( shiftCount <= 0 ) {
				3062	if ( 0x43E < aExp ) {
				3063	float_raise( float_flag_invalid STATUS_VAR);
				3064	if ( ! aSign
				3065	\|\| ( ( aExp == 0x7FF )
				3066	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				3067	) {
				3068	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3069	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3070	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3071	}
				3072	aSigExtra = 0;
				3073	aSig <<= - shiftCount;
				3074	}
				3075	else {
				3076	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				3077	}
				3078	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				3079
				3080	}
				3081
				3082	/*----------------------------------------------------------------------------
				3083	\| Returns the result of converting the double-precision floating-point value
				3084	\| `a' to the 64-bit two's complement integer format. The conversion is
				3085	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3086	\| Arithmetic, except that the conversion is always rounded toward zero.
				3087	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				3088	\| the conversion overflows, the largest integer with the same sign as `a' is
				3089	\| returned.
				3090	----------------------------------------------------------------------------/
				3091
				3092	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
				3093	{
				3094	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3095	int_fast16_t aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3096	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3097	int64 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3098	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3099
				3100	aSig = extractFloat64Frac( a );
				3101	aExp = extractFloat64Exp( a );
				3102	aSign = extractFloat64Sign( a );
				3103	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				3104	shiftCount = aExp - 0x433;
				3105	if ( 0 <= shiftCount ) {
				3106	if ( 0x43E <= aExp ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3107	if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3108	float_raise( float_flag_invalid STATUS_VAR);
				3109	if ( ! aSign
				3110	\|\| ( ( aExp == 0x7FF )
				3111	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				3112	) {
				3113	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3114	}
				3115	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3116	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3117	}
				3118	z = aSig<<shiftCount;
				3119	}
				3120	else {
				3121	if ( aExp < 0x3FE ) {
				3122	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3123	return 0;
				3124	}
				3125	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3126	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3127	STATUS(float_exception_flags) \|= float_flag_inexact;
				3128	}
				3129	}
				3130	if ( aSign ) z = - z;
				3131	return z;
				3132
				3133	}
				3134
				3135	/*----------------------------------------------------------------------------
				3136	\| Returns the result of converting the double-precision floating-point value
				3137	\| `a' to the single-precision floating-point format. The conversion is
				3138	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3139	\| Arithmetic.
				3140	----------------------------------------------------------------------------/
				3141
				3142	float32 float64_to_float32( float64 a STATUS_PARAM )
				3143	{
				3144	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3145	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3146	uint64_t aSig;
				3147	uint32_t zSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3148	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3149
				3150	aSig = extractFloat64Frac( a );
				3151	aExp = extractFloat64Exp( a );
				3152	aSign = extractFloat64Sign( a );
				3153	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	3154	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3155	return packFloat32( aSign, 0xFF, 0 );
				3156	}
				3157	shift64RightJamming( aSig, 22, &aSig );
				3158	zSig = aSig;
				3159	if ( aExp \|\| zSig ) {
				3160	zSig \|= 0x40000000;
				3161	aExp -= 0x381;
				3162	}
				3163	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				3164
				3165	}
				3166
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3167
				3168	/*----------------------------------------------------------------------------
				3169	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				3170	\| half-precision floating-point value, returning the result. After being
				3171	\| shifted into the proper positions, the three fields are simply added
				3172	\| together to form the result. This means that any integer portion of `zSig'
				3173	\| will be added into the exponent. Since a properly normalized significand
				3174	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				3175	\| than the desired result exponent whenever `zSig' is a complete, normalized
				3176	\| significand.
				3177	----------------------------------------------------------------------------/
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3178	static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3179	{
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3180	return make_float16(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3181	(((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3182	}
				3183
Peter Maydell	c4a1c5e	2014-01-07 17:19:11 +0000	[diff] [blame]	3184	/*----------------------------------------------------------------------------
				3185	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				3186	\| and significand `zSig', and returns the proper half-precision floating-
				3187	\| point value corresponding to the abstract input. Ordinarily, the abstract
				3188	\| value is simply rounded and packed into the half-precision format, with
				3189	\| the inexact exception raised if the abstract input cannot be represented
				3190	\| exactly. However, if the abstract value is too large, the overflow and
				3191	\| inexact exceptions are raised and an infinity or maximal finite value is
				3192	\| returned. If the abstract value is too small, the input value is rounded to
				3193	\| a subnormal number, and the underflow and inexact exceptions are raised if
				3194	\| the abstract input cannot be represented exactly as a subnormal half-
				3195	\| precision floating-point number.
				3196	\| The `ieee' flag indicates whether to use IEEE standard half precision, or
				3197	\| ARM-style "alternative representation", which omits the NaN and Inf
				3198	\| encodings in order to raise the maximum representable exponent by one.
				3199	\| The input significand `zSig' has its binary point between bits 22
				3200	\| and 23, which is 13 bits to the left of the usual location. This shifted
				3201	\| significand must be normalized or smaller. If `zSig' is not normalized,
				3202	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				3203	\| and it must not require rounding. In the usual case that `zSig' is
				3204	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				3205	\| Note the slightly odd position of the binary point in zSig compared with the
				3206	\| other roundAndPackFloat functions. This should probably be fixed if we
				3207	\| need to implement more float16 routines than just conversion.
				3208	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				3209	\| Binary Floating-Point Arithmetic.
				3210	----------------------------------------------------------------------------/
				3211
				3212	static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp,
				3213	uint32_t zSig, flag ieee STATUS_PARAM)
				3214	{
				3215	int maxexp = ieee ? 29 : 30;
				3216	uint32_t mask;
				3217	uint32_t increment;
Peter Maydell	c4a1c5e	2014-01-07 17:19:11 +0000	[diff] [blame]	3218	bool rounding_bumps_exp;
				3219	bool is_tiny = false;
				3220
				3221	/* Calculate the mask of bits of the mantissa which are not
				3222	* representable in half-precision and will be lost.
				3223	*/
				3224	if (zExp < 1) {
				3225	/* Will be denormal in halfprec */
				3226	mask = 0x00ffffff;
				3227	if (zExp >= -11) {
				3228	mask >>= 11 + zExp;
				3229	}
				3230	} else {
				3231	/* Normal number in halfprec */
				3232	mask = 0x00001fff;
				3233	}
				3234
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	3235	switch (STATUS(float_rounding_mode)) {
Peter Maydell	c4a1c5e	2014-01-07 17:19:11 +0000	[diff] [blame]	3236	case float_round_nearest_even:
				3237	increment = (mask + 1) >> 1;
				3238	if ((zSig & mask) == increment) {
				3239	increment = zSig & (increment << 1);
				3240	}
				3241	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	3242	case float_round_ties_away:
				3243	increment = (mask + 1) >> 1;
				3244	break;
Peter Maydell	c4a1c5e	2014-01-07 17:19:11 +0000	[diff] [blame]	3245	case float_round_up:
				3246	increment = zSign ? 0 : mask;
				3247	break;
				3248	case float_round_down:
				3249	increment = zSign ? mask : 0;
				3250	break;
				3251	default: /* round_to_zero */
				3252	increment = 0;
				3253	break;
				3254	}
				3255
				3256	rounding_bumps_exp = (zSig + increment >= 0x01000000);
				3257
				3258	if (zExp > maxexp \|\| (zExp == maxexp && rounding_bumps_exp)) {
				3259	if (ieee) {
				3260	float_raise(float_flag_overflow \| float_flag_inexact STATUS_VAR);
				3261	return packFloat16(zSign, 0x1f, 0);
				3262	} else {
				3263	float_raise(float_flag_invalid STATUS_VAR);
				3264	return packFloat16(zSign, 0x1f, 0x3ff);
				3265	}
				3266	}
				3267
				3268	if (zExp < 0) {
				3269	/* Note that flush-to-zero does not affect half-precision results */
				3270	is_tiny =
				3271	(STATUS(float_detect_tininess) == float_tininess_before_rounding)
				3272	\|\| (zExp < -1)
				3273	\|\| (!rounding_bumps_exp);
				3274	}
				3275	if (zSig & mask) {
				3276	float_raise(float_flag_inexact STATUS_VAR);
				3277	if (is_tiny) {
				3278	float_raise(float_flag_underflow STATUS_VAR);
				3279	}
				3280	}
				3281
				3282	zSig += increment;
				3283	if (rounding_bumps_exp) {
				3284	zSig >>= 1;
				3285	zExp++;
				3286	}
				3287
				3288	if (zExp < -10) {
				3289	return packFloat16(zSign, 0, 0);
				3290	}
				3291	if (zExp < 0) {
				3292	zSig >>= -zExp;
				3293	zExp = 0;
				3294	}
				3295	return packFloat16(zSign, zExp, zSig >> 13);
				3296	}
				3297
				3298	static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr,
				3299	uint32_t *zSigPtr)
				3300	{
				3301	int8_t shiftCount = countLeadingZeros32(aSig) - 21;
				3302	*zSigPtr = aSig << shiftCount;
				3303	*zExpPtr = 1 - shiftCount;
				3304	}
				3305
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3306	/* Half precision floats come in two formats: standard IEEE and "ARM" format.
				3307	The latter gains extra exponent range by omitting the NaN/Inf encodings. */
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3308
				3309	float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3310	{
				3311	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3312	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3313	uint32_t aSig;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3314
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3315	aSign = extractFloat16Sign(a);
				3316	aExp = extractFloat16Exp(a);
				3317	aSig = extractFloat16Frac(a);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3318
				3319	if (aExp == 0x1f && ieee) {
				3320	if (aSig) {
Peter Maydell	f591e1b	2011-02-10 11:28:59 +0000	[diff] [blame]	3321	return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3322	}
Peter Maydell	4be8eea	2012-09-24 17:28:35 +0100	[diff] [blame]	3323	return packFloat32(aSign, 0xff, 0);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3324	}
				3325	if (aExp == 0) {
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3326	if (aSig == 0) {
				3327	return packFloat32(aSign, 0, 0);
				3328	}
				3329
Peter Maydell	c4a1c5e	2014-01-07 17:19:11 +0000	[diff] [blame]	3330	normalizeFloat16Subnormal(aSig, &aExp, &aSig);
				3331	aExp--;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3332	}
				3333	return packFloat32( aSign, aExp + 0x70, aSig << 13);
				3334	}
				3335
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3336	float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3337	{
				3338	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3339	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3340	uint32_t aSig;
Peter Maydell	38970ef	2014-01-06 11:47:21 +0000	[diff] [blame]	3341
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3342	a = float32_squash_input_denormal(a STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3343
				3344	aSig = extractFloat32Frac( a );
				3345	aExp = extractFloat32Exp( a );
				3346	aSign = extractFloat32Sign( a );
				3347	if ( aExp == 0xFF ) {
				3348	if (aSig) {
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3349	/* Input is a NaN */
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3350	if (!ieee) {
Peter Maydell	38970ef	2014-01-06 11:47:21 +0000	[diff] [blame]	3351	float_raise(float_flag_invalid STATUS_VAR);
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3352	return packFloat16(aSign, 0, 0);
				3353	}
Peter Maydell	38970ef	2014-01-06 11:47:21 +0000	[diff] [blame]	3354	return commonNaNToFloat16(
				3355	float32ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3356	}
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3357	/* Infinity */
				3358	if (!ieee) {
				3359	float_raise(float_flag_invalid STATUS_VAR);
				3360	return packFloat16(aSign, 0x1f, 0x3ff);
				3361	}
				3362	return packFloat16(aSign, 0x1f, 0);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3363	}
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3364	if (aExp == 0 && aSig == 0) {
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3365	return packFloat16(aSign, 0, 0);
				3366	}
Peter Maydell	38970ef	2014-01-06 11:47:21 +0000	[diff] [blame]	3367	/* Decimal point between bits 22 and 23. Note that we add the 1 bit
				3368	* even if the input is denormal; however this is harmless because
				3369	* the largest possible single-precision denormal is still smaller
				3370	* than the smallest representable half-precision denormal, and so we
				3371	* will end up ignoring aSig and returning via the "always return zero"
				3372	* codepath.
				3373	*/
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3374	aSig \|= 0x00800000;
Peter Maydell	c4a1c5e	2014-01-07 17:19:11 +0000	[diff] [blame]	3375	aExp -= 0x71;
Peter Maydell	38970ef	2014-01-06 11:47:21 +0000	[diff] [blame]	3376
Peter Maydell	c4a1c5e	2014-01-07 17:19:11 +0000	[diff] [blame]	3377	return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3378	}
				3379
Peter Maydell	14c9a07	2014-01-07 17:19:12 +0000	[diff] [blame]	3380	float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM)
				3381	{
				3382	flag aSign;
				3383	int_fast16_t aExp;
				3384	uint32_t aSig;
				3385
				3386	aSign = extractFloat16Sign(a);
				3387	aExp = extractFloat16Exp(a);
				3388	aSig = extractFloat16Frac(a);
				3389
				3390	if (aExp == 0x1f && ieee) {
				3391	if (aSig) {
				3392	return commonNaNToFloat64(
				3393	float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
				3394	}
				3395	return packFloat64(aSign, 0x7ff, 0);
				3396	}
				3397	if (aExp == 0) {
				3398	if (aSig == 0) {
				3399	return packFloat64(aSign, 0, 0);
				3400	}
				3401
				3402	normalizeFloat16Subnormal(aSig, &aExp, &aSig);
				3403	aExp--;
				3404	}
				3405	return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
				3406	}
				3407
				3408	float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM)
				3409	{
				3410	flag aSign;
				3411	int_fast16_t aExp;
				3412	uint64_t aSig;
				3413	uint32_t zSig;
				3414
				3415	a = float64_squash_input_denormal(a STATUS_VAR);
				3416
				3417	aSig = extractFloat64Frac(a);
				3418	aExp = extractFloat64Exp(a);
				3419	aSign = extractFloat64Sign(a);
				3420	if (aExp == 0x7FF) {
				3421	if (aSig) {
				3422	/* Input is a NaN */
				3423	if (!ieee) {
				3424	float_raise(float_flag_invalid STATUS_VAR);
				3425	return packFloat16(aSign, 0, 0);
				3426	}
				3427	return commonNaNToFloat16(
				3428	float64ToCommonNaN(a STATUS_VAR) STATUS_VAR);
				3429	}
				3430	/* Infinity */
				3431	if (!ieee) {
				3432	float_raise(float_flag_invalid STATUS_VAR);
				3433	return packFloat16(aSign, 0x1f, 0x3ff);
				3434	}
				3435	return packFloat16(aSign, 0x1f, 0);
				3436	}
				3437	shift64RightJamming(aSig, 29, &aSig);
				3438	zSig = aSig;
				3439	if (aExp == 0 && zSig == 0) {
				3440	return packFloat16(aSign, 0, 0);
				3441	}
				3442	/* Decimal point between bits 22 and 23. Note that we add the 1 bit
				3443	* even if the input is denormal; however this is harmless because
				3444	* the largest possible single-precision denormal is still smaller
				3445	* than the smallest representable half-precision denormal, and so we
				3446	* will end up ignoring aSig and returning via the "always return zero"
				3447	* codepath.
				3448	*/
				3449	zSig \|= 0x00800000;
				3450	aExp -= 0x3F1;
				3451
				3452	return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR);
				3453	}
				3454
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3455	/*----------------------------------------------------------------------------
				3456	\| Returns the result of converting the double-precision floating-point value
				3457	\| `a' to the extended double-precision floating-point format. The conversion
				3458	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				3459	\| Arithmetic.
				3460	----------------------------------------------------------------------------/
				3461
				3462	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
				3463	{
				3464	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3465	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3466	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3467
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3468	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3469	aSig = extractFloat64Frac( a );
				3470	aExp = extractFloat64Exp( a );
				3471	aSign = extractFloat64Sign( a );
				3472	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	3473	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3474	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3475	}
				3476	if ( aExp == 0 ) {
				3477	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				3478	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3479	}
				3480	return
				3481	packFloatx80(
				3482	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				3483
				3484	}
				3485
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3486	/*----------------------------------------------------------------------------
				3487	\| Returns the result of converting the double-precision floating-point value
				3488	\| `a' to the quadruple-precision floating-point format. The conversion is
				3489	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3490	\| Arithmetic.
				3491	----------------------------------------------------------------------------/
				3492
				3493	float128 float64_to_float128( float64 a STATUS_PARAM )
				3494	{
				3495	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3496	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3497	uint64_t aSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3498
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3499	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3500	aSig = extractFloat64Frac( a );
				3501	aExp = extractFloat64Exp( a );
				3502	aSign = extractFloat64Sign( a );
				3503	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	3504	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3505	return packFloat128( aSign, 0x7FFF, 0, 0 );
				3506	}
				3507	if ( aExp == 0 ) {
				3508	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				3509	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3510	--aExp;
				3511	}
				3512	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
				3513	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
				3514
				3515	}
				3516
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3517	/*----------------------------------------------------------------------------
				3518	\| Rounds the double-precision floating-point value `a' to an integer, and
				3519	\| returns the result as a double-precision floating-point value. The
				3520	\| operation is performed according to the IEC/IEEE Standard for Binary
				3521	\| Floating-Point Arithmetic.
				3522	----------------------------------------------------------------------------/
				3523
				3524	float64 float64_round_to_int( float64 a STATUS_PARAM )
				3525	{
				3526	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3527	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3528	uint64_t lastBitMask, roundBitsMask;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3529	uint64_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3530	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3531
				3532	aExp = extractFloat64Exp( a );
				3533	if ( 0x433 <= aExp ) {
				3534	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				3535	return propagateFloat64NaN( a, a STATUS_VAR );
				3536	}
				3537	return a;
				3538	}
				3539	if ( aExp < 0x3FF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3540	if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3541	STATUS(float_exception_flags) \|= float_flag_inexact;
				3542	aSign = extractFloat64Sign( a );
				3543	switch ( STATUS(float_rounding_mode) ) {
				3544	case float_round_nearest_even:
				3545	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				3546	return packFloat64( aSign, 0x3FF, 0 );
				3547	}
				3548	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	3549	case float_round_ties_away:
				3550	if (aExp == 0x3FE) {
				3551	return packFloat64(aSign, 0x3ff, 0);
				3552	}
				3553	break;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3554	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3555	return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3556	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3557	return make_float64(
				3558	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3559	}
				3560	return packFloat64( aSign, 0, 0 );
				3561	}
				3562	lastBitMask = 1;
				3563	lastBitMask <<= 0x433 - aExp;
				3564	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3565	z = float64_val(a);
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	3566	switch (STATUS(float_rounding_mode)) {
				3567	case float_round_nearest_even:
				3568	z += lastBitMask >> 1;
				3569	if ((z & roundBitsMask) == 0) {
				3570	z &= ~lastBitMask;
				3571	}
				3572	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	3573	case float_round_ties_away:
				3574	z += lastBitMask >> 1;
				3575	break;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	3576	case float_round_to_zero:
				3577	break;
				3578	case float_round_up:
				3579	if (!extractFloat64Sign(make_float64(z))) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3580	z += roundBitsMask;
				3581	}
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	3582	break;
				3583	case float_round_down:
				3584	if (extractFloat64Sign(make_float64(z))) {
				3585	z += roundBitsMask;
				3586	}
				3587	break;
				3588	default:
				3589	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3590	}
				3591	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3592	if ( z != float64_val(a) )
				3593	STATUS(float_exception_flags) \|= float_flag_inexact;
				3594	return make_float64(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3595
				3596	}
				3597
pbrook	e6e5906	2006-10-22 00:18:54 +0000	[diff] [blame]	3598	float64 float64_trunc_to_int( float64 a STATUS_PARAM)
				3599	{
				3600	int oldmode;
				3601	float64 res;
				3602	oldmode = STATUS(float_rounding_mode);
				3603	STATUS(float_rounding_mode) = float_round_to_zero;
				3604	res = float64_round_to_int(a STATUS_VAR);
				3605	STATUS(float_rounding_mode) = oldmode;
				3606	return res;
				3607	}
				3608
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3609	/*----------------------------------------------------------------------------
				3610	\| Returns the result of adding the absolute values of the double-precision
				3611	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				3612	\| before being returned. `zSign' is ignored if the result is a NaN.
				3613	\| The addition is performed according to the IEC/IEEE Standard for Binary
				3614	\| Floating-Point Arithmetic.
				3615	----------------------------------------------------------------------------/
				3616
				3617	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				3618	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3619	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3620	uint64_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3621	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3622
				3623	aSig = extractFloat64Frac( a );
				3624	aExp = extractFloat64Exp( a );
				3625	bSig = extractFloat64Frac( b );
				3626	bExp = extractFloat64Exp( b );
				3627	expDiff = aExp - bExp;
				3628	aSig <<= 9;
				3629	bSig <<= 9;
				3630	if ( 0 < expDiff ) {
				3631	if ( aExp == 0x7FF ) {
				3632	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3633	return a;
				3634	}
				3635	if ( bExp == 0 ) {
				3636	--expDiff;
				3637	}
				3638	else {
				3639	bSig \|= LIT64( 0x2000000000000000 );
				3640	}
				3641	shift64RightJamming( bSig, expDiff, &bSig );
				3642	zExp = aExp;
				3643	}
				3644	else if ( expDiff < 0 ) {
				3645	if ( bExp == 0x7FF ) {
				3646	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3647	return packFloat64( zSign, 0x7FF, 0 );
				3648	}
				3649	if ( aExp == 0 ) {
				3650	++expDiff;
				3651	}
				3652	else {
				3653	aSig \|= LIT64( 0x2000000000000000 );
				3654	}
				3655	shift64RightJamming( aSig, - expDiff, &aSig );
				3656	zExp = bExp;
				3657	}
				3658	else {
				3659	if ( aExp == 0x7FF ) {
				3660	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3661	return a;
				3662	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	3663	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	3664	if (STATUS(flush_to_zero)) {
				3665	if (aSig \| bSig) {
				3666	float_raise(float_flag_output_denormal STATUS_VAR);
				3667	}
				3668	return packFloat64(zSign, 0, 0);
				3669	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	3670	return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				3671	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3672	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				3673	zExp = aExp;
				3674	goto roundAndPack;
				3675	}
				3676	aSig \|= LIT64( 0x2000000000000000 );
				3677	zSig = ( aSig + bSig )<<1;
				3678	--zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3679	if ( (int64_t) zSig < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3680	zSig = aSig + bSig;
				3681	++zExp;
				3682	}
				3683	roundAndPack:
				3684	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3685
				3686	}
				3687
				3688	/*----------------------------------------------------------------------------
				3689	\| Returns the result of subtracting the absolute values of the double-
				3690	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				3691	\| difference is negated before being returned. `zSign' is ignored if the
				3692	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				3693	\| Standard for Binary Floating-Point Arithmetic.
				3694	----------------------------------------------------------------------------/
				3695
				3696	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				3697	{
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3698	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3699	uint64_t aSig, bSig, zSig;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3700	int_fast16_t expDiff;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3701
				3702	aSig = extractFloat64Frac( a );
				3703	aExp = extractFloat64Exp( a );
				3704	bSig = extractFloat64Frac( b );
				3705	bExp = extractFloat64Exp( b );
				3706	expDiff = aExp - bExp;
				3707	aSig <<= 10;
				3708	bSig <<= 10;
				3709	if ( 0 < expDiff ) goto aExpBigger;
				3710	if ( expDiff < 0 ) goto bExpBigger;
				3711	if ( aExp == 0x7FF ) {
				3712	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3713	float_raise( float_flag_invalid STATUS_VAR);
				3714	return float64_default_nan;
				3715	}
				3716	if ( aExp == 0 ) {
				3717	aExp = 1;
				3718	bExp = 1;
				3719	}
				3720	if ( bSig < aSig ) goto aBigger;
				3721	if ( aSig < bSig ) goto bBigger;
				3722	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				3723	bExpBigger:
				3724	if ( bExp == 0x7FF ) {
				3725	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3726	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				3727	}
				3728	if ( aExp == 0 ) {
				3729	++expDiff;
				3730	}
				3731	else {
				3732	aSig \|= LIT64( 0x4000000000000000 );
				3733	}
				3734	shift64RightJamming( aSig, - expDiff, &aSig );
				3735	bSig \|= LIT64( 0x4000000000000000 );
				3736	bBigger:
				3737	zSig = bSig - aSig;
				3738	zExp = bExp;
				3739	zSign ^= 1;
				3740	goto normalizeRoundAndPack;
				3741	aExpBigger:
				3742	if ( aExp == 0x7FF ) {
				3743	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3744	return a;
				3745	}
				3746	if ( bExp == 0 ) {
				3747	--expDiff;
				3748	}
				3749	else {
				3750	bSig \|= LIT64( 0x4000000000000000 );
				3751	}
				3752	shift64RightJamming( bSig, expDiff, &bSig );
				3753	aSig \|= LIT64( 0x4000000000000000 );
				3754	aBigger:
				3755	zSig = aSig - bSig;
				3756	zExp = aExp;
				3757	normalizeRoundAndPack:
				3758	--zExp;
				3759	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3760
				3761	}
				3762
				3763	/*----------------------------------------------------------------------------
				3764	\| Returns the result of adding the double-precision floating-point values `a'
				3765	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				3766	\| Binary Floating-Point Arithmetic.
				3767	----------------------------------------------------------------------------/
				3768
				3769	float64 float64_add( float64 a, float64 b STATUS_PARAM )
				3770	{
				3771	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3772	a = float64_squash_input_denormal(a STATUS_VAR);
				3773	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3774
				3775	aSign = extractFloat64Sign( a );
				3776	bSign = extractFloat64Sign( b );
				3777	if ( aSign == bSign ) {
				3778	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				3779	}
				3780	else {
				3781	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				3782	}
				3783
				3784	}
				3785
				3786	/*----------------------------------------------------------------------------
				3787	\| Returns the result of subtracting the double-precision floating-point values
				3788	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				3789	\| for Binary Floating-Point Arithmetic.
				3790	----------------------------------------------------------------------------/
				3791
				3792	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
				3793	{
				3794	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3795	a = float64_squash_input_denormal(a STATUS_VAR);
				3796	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3797
				3798	aSign = extractFloat64Sign( a );
				3799	bSign = extractFloat64Sign( b );
				3800	if ( aSign == bSign ) {
				3801	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				3802	}
				3803	else {
				3804	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				3805	}
				3806
				3807	}
				3808
				3809	/*----------------------------------------------------------------------------
				3810	\| Returns the result of multiplying the double-precision floating-point values
				3811	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				3812	\| for Binary Floating-Point Arithmetic.
				3813	----------------------------------------------------------------------------/
				3814
				3815	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
				3816	{
				3817	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3818	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3819	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3820
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3821	a = float64_squash_input_denormal(a STATUS_VAR);
				3822	b = float64_squash_input_denormal(b STATUS_VAR);
				3823
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3824	aSig = extractFloat64Frac( a );
				3825	aExp = extractFloat64Exp( a );
				3826	aSign = extractFloat64Sign( a );
				3827	bSig = extractFloat64Frac( b );
				3828	bExp = extractFloat64Exp( b );
				3829	bSign = extractFloat64Sign( b );
				3830	zSign = aSign ^ bSign;
				3831	if ( aExp == 0x7FF ) {
				3832	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				3833	return propagateFloat64NaN( a, b STATUS_VAR );
				3834	}
				3835	if ( ( bExp \| bSig ) == 0 ) {
				3836	float_raise( float_flag_invalid STATUS_VAR);
				3837	return float64_default_nan;
				3838	}
				3839	return packFloat64( zSign, 0x7FF, 0 );
				3840	}
				3841	if ( bExp == 0x7FF ) {
				3842	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3843	if ( ( aExp \| aSig ) == 0 ) {
				3844	float_raise( float_flag_invalid STATUS_VAR);
				3845	return float64_default_nan;
				3846	}
				3847	return packFloat64( zSign, 0x7FF, 0 );
				3848	}
				3849	if ( aExp == 0 ) {
				3850	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				3851	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3852	}
				3853	if ( bExp == 0 ) {
				3854	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				3855	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3856	}
				3857	zExp = aExp + bExp - 0x3FF;
				3858	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				3859	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3860	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				3861	zSig0 \|= ( zSig1 != 0 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3862	if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3863	zSig0 <<= 1;
				3864	--zExp;
				3865	}
				3866	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
				3867
				3868	}
				3869
				3870	/*----------------------------------------------------------------------------
				3871	\| Returns the result of dividing the double-precision floating-point value `a'
				3872	\| by the corresponding value `b'. The operation is performed according to
				3873	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3874	----------------------------------------------------------------------------/
				3875
				3876	float64 float64_div( float64 a, float64 b STATUS_PARAM )
				3877	{
				3878	flag aSign, bSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3879	int_fast16_t aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3880	uint64_t aSig, bSig, zSig;
				3881	uint64_t rem0, rem1;
				3882	uint64_t term0, term1;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3883	a = float64_squash_input_denormal(a STATUS_VAR);
				3884	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3885
				3886	aSig = extractFloat64Frac( a );
				3887	aExp = extractFloat64Exp( a );
				3888	aSign = extractFloat64Sign( a );
				3889	bSig = extractFloat64Frac( b );
				3890	bExp = extractFloat64Exp( b );
				3891	bSign = extractFloat64Sign( b );
				3892	zSign = aSign ^ bSign;
				3893	if ( aExp == 0x7FF ) {
				3894	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3895	if ( bExp == 0x7FF ) {
				3896	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3897	float_raise( float_flag_invalid STATUS_VAR);
				3898	return float64_default_nan;
				3899	}
				3900	return packFloat64( zSign, 0x7FF, 0 );
				3901	}
				3902	if ( bExp == 0x7FF ) {
				3903	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3904	return packFloat64( zSign, 0, 0 );
				3905	}
				3906	if ( bExp == 0 ) {
				3907	if ( bSig == 0 ) {
				3908	if ( ( aExp \| aSig ) == 0 ) {
				3909	float_raise( float_flag_invalid STATUS_VAR);
				3910	return float64_default_nan;
				3911	}
				3912	float_raise( float_flag_divbyzero STATUS_VAR);
				3913	return packFloat64( zSign, 0x7FF, 0 );
				3914	}
				3915	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3916	}
				3917	if ( aExp == 0 ) {
				3918	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				3919	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3920	}
				3921	zExp = aExp - bExp + 0x3FD;
				3922	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				3923	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3924	if ( bSig <= ( aSig + aSig ) ) {
				3925	aSig >>= 1;
				3926	++zExp;
				3927	}
				3928	zSig = estimateDiv128To64( aSig, 0, bSig );
				3929	if ( ( zSig & 0x1FF ) <= 2 ) {
				3930	mul64To128( bSig, zSig, &term0, &term1 );
				3931	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3932	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3933	--zSig;
				3934	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3935	}
				3936	zSig \|= ( rem1 != 0 );
				3937	}
				3938	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3939
				3940	}
				3941
				3942	/*----------------------------------------------------------------------------
				3943	\| Returns the remainder of the double-precision floating-point value `a'
				3944	\| with respect to the corresponding value `b'. The operation is performed
				3945	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3946	----------------------------------------------------------------------------/
				3947
				3948	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
				3949	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	3950	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	3951	int_fast16_t aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3952	uint64_t aSig, bSig;
				3953	uint64_t q, alternateASig;
				3954	int64_t sigMean;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3955
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3956	a = float64_squash_input_denormal(a STATUS_VAR);
				3957	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3958	aSig = extractFloat64Frac( a );
				3959	aExp = extractFloat64Exp( a );
				3960	aSign = extractFloat64Sign( a );
				3961	bSig = extractFloat64Frac( b );
				3962	bExp = extractFloat64Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3963	if ( aExp == 0x7FF ) {
				3964	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				3965	return propagateFloat64NaN( a, b STATUS_VAR );
				3966	}
				3967	float_raise( float_flag_invalid STATUS_VAR);
				3968	return float64_default_nan;
				3969	}
				3970	if ( bExp == 0x7FF ) {
				3971	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3972	return a;
				3973	}
				3974	if ( bExp == 0 ) {
				3975	if ( bSig == 0 ) {
				3976	float_raise( float_flag_invalid STATUS_VAR);
				3977	return float64_default_nan;
				3978	}
				3979	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3980	}
				3981	if ( aExp == 0 ) {
				3982	if ( aSig == 0 ) return a;
				3983	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3984	}
				3985	expDiff = aExp - bExp;
				3986	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				3987	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3988	if ( expDiff < 0 ) {
				3989	if ( expDiff < -1 ) return a;
				3990	aSig >>= 1;
				3991	}
				3992	q = ( bSig <= aSig );
				3993	if ( q ) aSig -= bSig;
				3994	expDiff -= 64;
				3995	while ( 0 < expDiff ) {
				3996	q = estimateDiv128To64( aSig, 0, bSig );
				3997	q = ( 2 < q ) ? q - 2 : 0;
				3998	aSig = - ( ( bSig>>2 ) * q );
				3999	expDiff -= 62;
				4000	}
				4001	expDiff += 64;
				4002	if ( 0 < expDiff ) {
				4003	q = estimateDiv128To64( aSig, 0, bSig );
				4004	q = ( 2 < q ) ? q - 2 : 0;
				4005	q >>= 64 - expDiff;
				4006	bSig >>= 2;
				4007	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				4008	}
				4009	else {
				4010	aSig >>= 2;
				4011	bSig >>= 2;
				4012	}
				4013	do {
				4014	alternateASig = aSig;
				4015	++q;
				4016	aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4017	} while ( 0 <= (int64_t) aSig );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4018	sigMean = aSig + alternateASig;
				4019	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				4020	aSig = alternateASig;
				4021	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4022	zSign = ( (int64_t) aSig < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4023	if ( zSign ) aSig = - aSig;
				4024	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
				4025
				4026	}
				4027
				4028	/*----------------------------------------------------------------------------
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	4029	\| Returns the result of multiplying the double-precision floating-point values
				4030	\| `a' and `b' then adding 'c', with no intermediate rounding step after the
				4031	\| multiplication. The operation is performed according to the IEC/IEEE
				4032	\| Standard for Binary Floating-Point Arithmetic 754-2008.
				4033	\| The flags argument allows the caller to select negation of the
				4034	\| addend, the intermediate product, or the final result. (The difference
				4035	\| between this and having the caller do a separate negation is that negating
				4036	\| externally will flip the sign bit on NaNs.)
				4037	----------------------------------------------------------------------------/
				4038
				4039	float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
				4040	{
				4041	flag aSign, bSign, cSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	4042	int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	4043	uint64_t aSig, bSig, cSig;
				4044	flag pInf, pZero, pSign;
				4045	uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
				4046	int shiftcount;
				4047	flag signflip, infzero;
				4048
				4049	a = float64_squash_input_denormal(a STATUS_VAR);
				4050	b = float64_squash_input_denormal(b STATUS_VAR);
				4051	c = float64_squash_input_denormal(c STATUS_VAR);
				4052	aSig = extractFloat64Frac(a);
				4053	aExp = extractFloat64Exp(a);
				4054	aSign = extractFloat64Sign(a);
				4055	bSig = extractFloat64Frac(b);
				4056	bExp = extractFloat64Exp(b);
				4057	bSign = extractFloat64Sign(b);
				4058	cSig = extractFloat64Frac(c);
				4059	cExp = extractFloat64Exp(c);
				4060	cSign = extractFloat64Sign(c);
				4061
				4062	infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) \|\|
				4063	(aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
				4064
				4065	/* It is implementation-defined whether the cases of (0,inf,qnan)
				4066	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
				4067	* they return if they do), so we have to hand this information
				4068	* off to the target-specific pick-a-NaN routine.
				4069	*/
				4070	if (((aExp == 0x7ff) && aSig) \|\|
				4071	((bExp == 0x7ff) && bSig) \|\|
				4072	((cExp == 0x7ff) && cSig)) {
				4073	return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
				4074	}
				4075
				4076	if (infzero) {
				4077	float_raise(float_flag_invalid STATUS_VAR);
				4078	return float64_default_nan;
				4079	}
				4080
				4081	if (flags & float_muladd_negate_c) {
				4082	cSign ^= 1;
				4083	}
				4084
				4085	signflip = (flags & float_muladd_negate_result) ? 1 : 0;
				4086
				4087	/* Work out the sign and type of the product */
				4088	pSign = aSign ^ bSign;
				4089	if (flags & float_muladd_negate_product) {
				4090	pSign ^= 1;
				4091	}
				4092	pInf = (aExp == 0x7ff) \|\| (bExp == 0x7ff);
				4093	pZero = ((aExp \| aSig) == 0) \|\| ((bExp \| bSig) == 0);
				4094
				4095	if (cExp == 0x7ff) {
				4096	if (pInf && (pSign ^ cSign)) {
				4097	/* addition of opposite-signed infinities => InvalidOperation */
				4098	float_raise(float_flag_invalid STATUS_VAR);
				4099	return float64_default_nan;
				4100	}
				4101	/* Otherwise generate an infinity of the same sign */
				4102	return packFloat64(cSign ^ signflip, 0x7ff, 0);
				4103	}
				4104
				4105	if (pInf) {
				4106	return packFloat64(pSign ^ signflip, 0x7ff, 0);
				4107	}
				4108
				4109	if (pZero) {
				4110	if (cExp == 0) {
				4111	if (cSig == 0) {
				4112	/* Adding two exact zeroes */
				4113	if (pSign == cSign) {
				4114	zSign = pSign;
				4115	} else if (STATUS(float_rounding_mode) == float_round_down) {
				4116	zSign = 1;
				4117	} else {
				4118	zSign = 0;
				4119	}
				4120	return packFloat64(zSign ^ signflip, 0, 0);
				4121	}
				4122	/* Exact zero plus a denorm */
				4123	if (STATUS(flush_to_zero)) {
				4124	float_raise(float_flag_output_denormal STATUS_VAR);
				4125	return packFloat64(cSign ^ signflip, 0, 0);
				4126	}
				4127	}
				4128	/* Zero plus something non-zero : just return the something */
Peter Maydell	67d4353	2014-02-20 10:35:50 +0000	[diff] [blame]	4129	if (flags & float_muladd_halve_result) {
				4130	if (cExp == 0) {
				4131	normalizeFloat64Subnormal(cSig, &cExp, &cSig);
				4132	}
				4133	/* Subtract one to halve, and one again because roundAndPackFloat64
				4134	* wants one less than the true exponent.
				4135	*/
				4136	cExp -= 2;
				4137	cSig = (cSig \| 0x0010000000000000ULL) << 10;
				4138	return roundAndPackFloat64(cSign ^ signflip, cExp, cSig STATUS_VAR);
				4139	}
Richard Sandiford	a6e7c18	2013-01-22 17:03:05 +0000	[diff] [blame]	4140	return packFloat64(cSign ^ signflip, cExp, cSig);
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	4141	}
				4142
				4143	if (aExp == 0) {
				4144	normalizeFloat64Subnormal(aSig, &aExp, &aSig);
				4145	}
				4146	if (bExp == 0) {
				4147	normalizeFloat64Subnormal(bSig, &bExp, &bSig);
				4148	}
				4149
				4150	/* Calculate the actual result a * b + c */
				4151
				4152	/* Multiply first; this is easy. */
				4153	/* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
				4154	* because we want the true exponent, not the "one-less-than"
				4155	* flavour that roundAndPackFloat64() takes.
				4156	*/
				4157	pExp = aExp + bExp - 0x3fe;
				4158	aSig = (aSig \| LIT64(0x0010000000000000))<<10;
				4159	bSig = (bSig \| LIT64(0x0010000000000000))<<11;
				4160	mul64To128(aSig, bSig, &pSig0, &pSig1);
				4161	if ((int64_t)(pSig0 << 1) >= 0) {
				4162	shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
				4163	pExp--;
				4164	}
				4165
				4166	zSign = pSign ^ signflip;
				4167
				4168	/* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
				4169	* bit in position 126.
				4170	*/
				4171	if (cExp == 0) {
				4172	if (!cSig) {
				4173	/* Throw out the special case of c being an exact zero now */
				4174	shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
Peter Maydell	67d4353	2014-02-20 10:35:50 +0000	[diff] [blame]	4175	if (flags & float_muladd_halve_result) {
				4176	pExp--;
				4177	}
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	4178	return roundAndPackFloat64(zSign, pExp - 1,
				4179	pSig1 STATUS_VAR);
				4180	}
				4181	normalizeFloat64Subnormal(cSig, &cExp, &cSig);
				4182	}
				4183
				4184	/* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
				4185	* significand of the addend, with the explicit bit in position 126.
				4186	*/
				4187	cSig0 = cSig << (126 - 64 - 52);
				4188	cSig1 = 0;
				4189	cSig0 \|= LIT64(0x4000000000000000);
				4190	expDiff = pExp - cExp;
				4191
				4192	if (pSign == cSign) {
				4193	/* Addition */
				4194	if (expDiff > 0) {
				4195	/* scale c to match p */
				4196	shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
				4197	zExp = pExp;
				4198	} else if (expDiff < 0) {
				4199	/* scale p to match c */
				4200	shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
				4201	zExp = cExp;
				4202	} else {
				4203	/* no scaling needed */
				4204	zExp = cExp;
				4205	}
				4206	/* Add significands and make sure explicit bit ends up in posn 126 */
				4207	add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				4208	if ((int64_t)zSig0 < 0) {
				4209	shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
				4210	} else {
				4211	zExp--;
				4212	}
				4213	shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
Peter Maydell	67d4353	2014-02-20 10:35:50 +0000	[diff] [blame]	4214	if (flags & float_muladd_halve_result) {
				4215	zExp--;
				4216	}
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	4217	return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
				4218	} else {
				4219	/* Subtraction */
				4220	if (expDiff > 0) {
				4221	shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
				4222	sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				4223	zExp = pExp;
				4224	} else if (expDiff < 0) {
				4225	shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
				4226	sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
				4227	zExp = cExp;
				4228	zSign ^= 1;
				4229	} else {
				4230	zExp = pExp;
				4231	if (lt128(cSig0, cSig1, pSig0, pSig1)) {
				4232	sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				4233	} else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
				4234	sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
				4235	zSign ^= 1;
				4236	} else {
				4237	/* Exact zero */
				4238	zSign = signflip;
				4239	if (STATUS(float_rounding_mode) == float_round_down) {
				4240	zSign ^= 1;
				4241	}
				4242	return packFloat64(zSign, 0, 0);
				4243	}
				4244	}
				4245	--zExp;
				4246	/* Do the equivalent of normalizeRoundAndPackFloat64() but
				4247	* starting with the significand in a pair of uint64_t.
				4248	*/
				4249	if (zSig0) {
				4250	shiftcount = countLeadingZeros64(zSig0) - 1;
				4251	shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
				4252	if (zSig1) {
				4253	zSig0 \|= 1;
				4254	}
				4255	zExp -= shiftcount;
				4256	} else {
Peter Maydell	e3d142d	2013-04-12 16:37:52 +0100	[diff] [blame]	4257	shiftcount = countLeadingZeros64(zSig1);
				4258	if (shiftcount == 0) {
				4259	zSig0 = (zSig1 >> 1) \| (zSig1 & 1);
				4260	zExp -= 63;
				4261	} else {
				4262	shiftcount--;
				4263	zSig0 = zSig1 << shiftcount;
				4264	zExp -= (shiftcount + 64);
				4265	}
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	4266	}
Peter Maydell	67d4353	2014-02-20 10:35:50 +0000	[diff] [blame]	4267	if (flags & float_muladd_halve_result) {
				4268	zExp--;
				4269	}
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	4270	return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
				4271	}
				4272	}
				4273
				4274	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4275	\| Returns the square root of the double-precision floating-point value `a'.
				4276	\| The operation is performed according to the IEC/IEEE Standard for Binary
				4277	\| Floating-Point Arithmetic.
				4278	----------------------------------------------------------------------------/
				4279
				4280	float64 float64_sqrt( float64 a STATUS_PARAM )
				4281	{
				4282	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	4283	int_fast16_t aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4284	uint64_t aSig, zSig, doubleZSig;
				4285	uint64_t rem0, rem1, term0, term1;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4286	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4287
				4288	aSig = extractFloat64Frac( a );
				4289	aExp = extractFloat64Exp( a );
				4290	aSign = extractFloat64Sign( a );
				4291	if ( aExp == 0x7FF ) {
				4292	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
				4293	if ( ! aSign ) return a;
				4294	float_raise( float_flag_invalid STATUS_VAR);
				4295	return float64_default_nan;
				4296	}
				4297	if ( aSign ) {
				4298	if ( ( aExp \| aSig ) == 0 ) return a;
				4299	float_raise( float_flag_invalid STATUS_VAR);
				4300	return float64_default_nan;
				4301	}
				4302	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4303	if ( aSig == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4304	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				4305	}
				4306	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				4307	aSig \|= LIT64( 0x0010000000000000 );
				4308	zSig = estimateSqrt32( aExp, aSig>>21 );
				4309	aSig <<= 9 - ( aExp & 1 );
				4310	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
				4311	if ( ( zSig & 0x1FF ) <= 5 ) {
				4312	doubleZSig = zSig<<1;
				4313	mul64To128( zSig, zSig, &term0, &term1 );
				4314	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4315	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4316	--zSig;
				4317	doubleZSig -= 2;
				4318	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
				4319	}
				4320	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				4321	}
				4322	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
				4323
				4324	}
				4325
				4326	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	4327	\| Returns the binary log of the double-precision floating-point value `a'.
				4328	\| The operation is performed according to the IEC/IEEE Standard for Binary
				4329	\| Floating-Point Arithmetic.
				4330	----------------------------------------------------------------------------/
				4331	float64 float64_log2( float64 a STATUS_PARAM )
				4332	{
				4333	flag aSign, zSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	4334	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4335	uint64_t aSig, aSig0, aSig1, zSig, i;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4336	a = float64_squash_input_denormal(a STATUS_VAR);
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	4337
				4338	aSig = extractFloat64Frac( a );
				4339	aExp = extractFloat64Exp( a );
				4340	aSign = extractFloat64Sign( a );
				4341
				4342	if ( aExp == 0 ) {
				4343	if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
				4344	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				4345	}
				4346	if ( aSign ) {
				4347	float_raise( float_flag_invalid STATUS_VAR);
				4348	return float64_default_nan;
				4349	}
				4350	if ( aExp == 0x7FF ) {
				4351	if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
				4352	return a;
				4353	}
				4354
				4355	aExp -= 0x3FF;
				4356	aSig \|= LIT64( 0x0010000000000000 );
				4357	zSign = aExp < 0;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4358	zSig = (uint64_t)aExp << 52;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	4359	for (i = 1LL << 51; i > 0; i >>= 1) {
				4360	mul64To128( aSig, aSig, &aSig0, &aSig1 );
				4361	aSig = ( aSig0 << 12 ) \| ( aSig1 >> 52 );
				4362	if ( aSig & LIT64( 0x0020000000000000 ) ) {
				4363	aSig >>= 1;
				4364	zSig \|= i;
				4365	}
				4366	}
				4367
				4368	if ( zSign )
				4369	zSig = -zSig;
				4370	return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
				4371	}
				4372
				4373	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4374	\| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4375	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
				4376	\| if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4377	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4378	----------------------------------------------------------------------------/
				4379
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4380	int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4381	{
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4382	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4383	a = float64_squash_input_denormal(a STATUS_VAR);
				4384	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4385
				4386	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4387	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4388	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4389	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4390	return 0;
				4391	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4392	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4393	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4394	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4395
				4396	}
				4397
				4398	/*----------------------------------------------------------------------------
				4399	\| Returns 1 if the double-precision floating-point value `a' is less than or
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4400	\| equal to the corresponding value `b', and 0 otherwise. The invalid
				4401	\| exception is raised if either operand is a NaN. The comparison is performed
				4402	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4403	----------------------------------------------------------------------------/
				4404
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4405	int float64_le( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4406	{
				4407	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4408	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4409	a = float64_squash_input_denormal(a STATUS_VAR);
				4410	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4411
				4412	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4413	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4414	) {
				4415	float_raise( float_flag_invalid STATUS_VAR);
				4416	return 0;
				4417	}
				4418	aSign = extractFloat64Sign( a );
				4419	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4420	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4421	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4422	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4423	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4424
				4425	}
				4426
				4427	/*----------------------------------------------------------------------------
				4428	\| Returns 1 if the double-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4429	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				4430	\| raised if either operand is a NaN. The comparison is performed according
				4431	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4432	----------------------------------------------------------------------------/
				4433
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4434	int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4435	{
				4436	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4437	uint64_t av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4438
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4439	a = float64_squash_input_denormal(a STATUS_VAR);
				4440	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4441	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4442	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4443	) {
				4444	float_raise( float_flag_invalid STATUS_VAR);
				4445	return 0;
				4446	}
				4447	aSign = extractFloat64Sign( a );
				4448	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4449	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4450	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4451	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4452	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4453
				4454	}
				4455
				4456	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4457	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4458	\| be compared, and 0 otherwise. The invalid exception is raised if either
				4459	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				4460	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4461	----------------------------------------------------------------------------/
				4462
				4463	int float64_unordered( float64 a, float64 b STATUS_PARAM )
				4464	{
				4465	a = float64_squash_input_denormal(a STATUS_VAR);
				4466	b = float64_squash_input_denormal(b STATUS_VAR);
				4467
				4468	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4469	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4470	) {
				4471	float_raise( float_flag_invalid STATUS_VAR);
				4472	return 1;
				4473	}
				4474	return 0;
				4475	}
				4476
				4477	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4478	\| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4479	\| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				4480	\| exception.The comparison is performed according to the IEC/IEEE Standard
				4481	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4482	----------------------------------------------------------------------------/
				4483
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4484	int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4485	{
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4486	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4487	a = float64_squash_input_denormal(a STATUS_VAR);
				4488	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4489
				4490	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4491	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4492	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4493	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4494	float_raise( float_flag_invalid STATUS_VAR);
				4495	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4496	return 0;
				4497	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4498	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4499	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4500	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4501
				4502	}
				4503
				4504	/*----------------------------------------------------------------------------
				4505	\| Returns 1 if the double-precision floating-point value `a' is less than or
				4506	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				4507	\| cause an exception. Otherwise, the comparison is performed according to the
				4508	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4509	----------------------------------------------------------------------------/
				4510
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4511	int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4512	{
				4513	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4514	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4515	a = float64_squash_input_denormal(a STATUS_VAR);
				4516	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4517
				4518	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4519	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4520	) {
				4521	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4522	float_raise( float_flag_invalid STATUS_VAR);
				4523	}
				4524	return 0;
				4525	}
				4526	aSign = extractFloat64Sign( a );
				4527	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4528	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4529	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4530	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4531	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4532
				4533	}
				4534
				4535	/*----------------------------------------------------------------------------
				4536	\| Returns 1 if the double-precision floating-point value `a' is less than
				4537	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				4538	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				4539	\| Standard for Binary Floating-Point Arithmetic.
				4540	----------------------------------------------------------------------------/
				4541
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4542	int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4543	{
				4544	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4545	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4546	a = float64_squash_input_denormal(a STATUS_VAR);
				4547	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4548
				4549	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4550	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4551	) {
				4552	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4553	float_raise( float_flag_invalid STATUS_VAR);
				4554	}
				4555	return 0;
				4556	}
				4557	aSign = extractFloat64Sign( a );
				4558	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4559	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4560	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4561	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4562	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4563
				4564	}
				4565
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4566	/*----------------------------------------------------------------------------
				4567	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
				4568	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				4569	\| comparison is performed according to the IEC/IEEE Standard for Binary
				4570	\| Floating-Point Arithmetic.
				4571	----------------------------------------------------------------------------/
				4572
				4573	int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
				4574	{
				4575	a = float64_squash_input_denormal(a STATUS_VAR);
				4576	b = float64_squash_input_denormal(b STATUS_VAR);
				4577
				4578	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4579	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4580	) {
				4581	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4582	float_raise( float_flag_invalid STATUS_VAR);
				4583	}
				4584	return 1;
				4585	}
				4586	return 0;
				4587	}
				4588
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4589	/*----------------------------------------------------------------------------
				4590	\| Returns the result of converting the extended double-precision floating-
				4591	\| point value `a' to the 32-bit two's complement integer format. The
				4592	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4593	\| Floating-Point Arithmetic---which means in particular that the conversion
				4594	\| is rounded according to the current rounding mode. If `a' is a NaN, the
				4595	\| largest positive integer is returned. Otherwise, if the conversion
				4596	\| overflows, the largest integer with the same sign as `a' is returned.
				4597	----------------------------------------------------------------------------/
				4598
				4599	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
				4600	{
				4601	flag aSign;
				4602	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4603	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4604
				4605	aSig = extractFloatx80Frac( a );
				4606	aExp = extractFloatx80Exp( a );
				4607	aSign = extractFloatx80Sign( a );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4608	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4609	shiftCount = 0x4037 - aExp;
				4610	if ( shiftCount <= 0 ) shiftCount = 1;
				4611	shift64RightJamming( aSig, shiftCount, &aSig );
				4612	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				4613
				4614	}
				4615
				4616	/*----------------------------------------------------------------------------
				4617	\| Returns the result of converting the extended double-precision floating-
				4618	\| point value `a' to the 32-bit two's complement integer format. The
				4619	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4620	\| Floating-Point Arithmetic, except that the conversion is always rounded
				4621	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				4622	\| Otherwise, if the conversion overflows, the largest integer with the same
				4623	\| sign as `a' is returned.
				4624	----------------------------------------------------------------------------/
				4625
				4626	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
				4627	{
				4628	flag aSign;
				4629	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4630	uint64_t aSig, savedASig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	4631	int32_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4632
				4633	aSig = extractFloatx80Frac( a );
				4634	aExp = extractFloatx80Exp( a );
				4635	aSign = extractFloatx80Sign( a );
				4636	if ( 0x401E < aExp ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4637	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4638	goto invalid;
				4639	}
				4640	else if ( aExp < 0x3FFF ) {
				4641	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4642	return 0;
				4643	}
				4644	shiftCount = 0x403E - aExp;
				4645	savedASig = aSig;
				4646	aSig >>= shiftCount;
				4647	z = aSig;
				4648	if ( aSign ) z = - z;
				4649	if ( ( z < 0 ) ^ aSign ) {
				4650	invalid:
				4651	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4652	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4653	}
				4654	if ( ( aSig<<shiftCount ) != savedASig ) {
				4655	STATUS(float_exception_flags) \|= float_flag_inexact;
				4656	}
				4657	return z;
				4658
				4659	}
				4660
				4661	/*----------------------------------------------------------------------------
				4662	\| Returns the result of converting the extended double-precision floating-
				4663	\| point value `a' to the 64-bit two's complement integer format. The
				4664	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4665	\| Floating-Point Arithmetic---which means in particular that the conversion
				4666	\| is rounded according to the current rounding mode. If `a' is a NaN,
				4667	\| the largest positive integer is returned. Otherwise, if the conversion
				4668	\| overflows, the largest integer with the same sign as `a' is returned.
				4669	----------------------------------------------------------------------------/
				4670
				4671	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
				4672	{
				4673	flag aSign;
				4674	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4675	uint64_t aSig, aSigExtra;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4676
				4677	aSig = extractFloatx80Frac( a );
				4678	aExp = extractFloatx80Exp( a );
				4679	aSign = extractFloatx80Sign( a );
				4680	shiftCount = 0x403E - aExp;
				4681	if ( shiftCount <= 0 ) {
				4682	if ( shiftCount ) {
				4683	float_raise( float_flag_invalid STATUS_VAR);
				4684	if ( ! aSign
				4685	\|\| ( ( aExp == 0x7FFF )
				4686	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
				4687	) {
				4688	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4689	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4690	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4691	}
				4692	aSigExtra = 0;
				4693	}
				4694	else {
				4695	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				4696	}
				4697	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				4698
				4699	}
				4700
				4701	/*----------------------------------------------------------------------------
				4702	\| Returns the result of converting the extended double-precision floating-
				4703	\| point value `a' to the 64-bit two's complement integer format. The
				4704	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4705	\| Floating-Point Arithmetic, except that the conversion is always rounded
				4706	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				4707	\| Otherwise, if the conversion overflows, the largest integer with the same
				4708	\| sign as `a' is returned.
				4709	----------------------------------------------------------------------------/
				4710
				4711	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
				4712	{
				4713	flag aSign;
				4714	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4715	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4716	int64 z;
				4717
				4718	aSig = extractFloatx80Frac( a );
				4719	aExp = extractFloatx80Exp( a );
				4720	aSign = extractFloatx80Sign( a );
				4721	shiftCount = aExp - 0x403E;
				4722	if ( 0 <= shiftCount ) {
				4723	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
				4724	if ( ( a.high != 0xC03E ) \|\| aSig ) {
				4725	float_raise( float_flag_invalid STATUS_VAR);
				4726	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
				4727	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4728	}
				4729	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4730	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4731	}
				4732	else if ( aExp < 0x3FFF ) {
				4733	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4734	return 0;
				4735	}
				4736	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4737	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4738	STATUS(float_exception_flags) \|= float_flag_inexact;
				4739	}
				4740	if ( aSign ) z = - z;
				4741	return z;
				4742
				4743	}
				4744
				4745	/*----------------------------------------------------------------------------
				4746	\| Returns the result of converting the extended double-precision floating-
				4747	\| point value `a' to the single-precision floating-point format. The
				4748	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4749	\| Floating-Point Arithmetic.
				4750	----------------------------------------------------------------------------/
				4751
				4752	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
				4753	{
				4754	flag aSign;
				4755	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4756	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4757
				4758	aSig = extractFloatx80Frac( a );
				4759	aExp = extractFloatx80Exp( a );
				4760	aSign = extractFloatx80Sign( a );
				4761	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4762	if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4763	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4764	}
				4765	return packFloat32( aSign, 0xFF, 0 );
				4766	}
				4767	shift64RightJamming( aSig, 33, &aSig );
				4768	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				4769	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
				4770
				4771	}
				4772
				4773	/*----------------------------------------------------------------------------
				4774	\| Returns the result of converting the extended double-precision floating-
				4775	\| point value `a' to the double-precision floating-point format. The
				4776	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4777	\| Floating-Point Arithmetic.
				4778	----------------------------------------------------------------------------/
				4779
				4780	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
				4781	{
				4782	flag aSign;
				4783	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4784	uint64_t aSig, zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4785
				4786	aSig = extractFloatx80Frac( a );
				4787	aExp = extractFloatx80Exp( a );
				4788	aSign = extractFloatx80Sign( a );
				4789	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4790	if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4791	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4792	}
				4793	return packFloat64( aSign, 0x7FF, 0 );
				4794	}
				4795	shift64RightJamming( aSig, 1, &zSig );
				4796	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				4797	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
				4798
				4799	}
				4800
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4801	/*----------------------------------------------------------------------------
				4802	\| Returns the result of converting the extended double-precision floating-
				4803	\| point value `a' to the quadruple-precision floating-point format. The
				4804	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4805	\| Floating-Point Arithmetic.
				4806	----------------------------------------------------------------------------/
				4807
				4808	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
				4809	{
				4810	flag aSign;
Andreas Färber	94a49d8	2012-04-26 00:15:56 +0200	[diff] [blame]	4811	int_fast16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4812	uint64_t aSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4813
				4814	aSig = extractFloatx80Frac( a );
				4815	aExp = extractFloatx80Exp( a );
				4816	aSign = extractFloatx80Sign( a );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4817	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4818	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4819	}
				4820	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
				4821	return packFloat128( aSign, aExp, zSig0, zSig1 );
				4822
				4823	}
				4824
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4825	/*----------------------------------------------------------------------------
				4826	\| Rounds the extended double-precision floating-point value `a' to an integer,
				4827	\| and returns the result as an extended quadruple-precision floating-point
				4828	\| value. The operation is performed according to the IEC/IEEE Standard for
				4829	\| Binary Floating-Point Arithmetic.
				4830	----------------------------------------------------------------------------/
				4831
				4832	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
				4833	{
				4834	flag aSign;
				4835	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4836	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4837	floatx80 z;
				4838
				4839	aExp = extractFloatx80Exp( a );
				4840	if ( 0x403E <= aExp ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4841	if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4842	return propagateFloatx80NaN( a, a STATUS_VAR );
				4843	}
				4844	return a;
				4845	}
				4846	if ( aExp < 0x3FFF ) {
				4847	if ( ( aExp == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4848	&& ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4849	return a;
				4850	}
				4851	STATUS(float_exception_flags) \|= float_flag_inexact;
				4852	aSign = extractFloatx80Sign( a );
				4853	switch ( STATUS(float_rounding_mode) ) {
				4854	case float_round_nearest_even:
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4855	if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4856	) {
				4857	return
				4858	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				4859	}
				4860	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	4861	case float_round_ties_away:
				4862	if (aExp == 0x3FFE) {
				4863	return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
				4864	}
				4865	break;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4866	case float_round_down:
				4867	return
				4868	aSign ?
				4869	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				4870	: packFloatx80( 0, 0, 0 );
				4871	case float_round_up:
				4872	return
				4873	aSign ? packFloatx80( 1, 0, 0 )
				4874	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				4875	}
				4876	return packFloatx80( aSign, 0, 0 );
				4877	}
				4878	lastBitMask = 1;
				4879	lastBitMask <<= 0x403E - aExp;
				4880	roundBitsMask = lastBitMask - 1;
				4881	z = a;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	4882	switch (STATUS(float_rounding_mode)) {
				4883	case float_round_nearest_even:
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4884	z.low += lastBitMask>>1;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	4885	if ((z.low & roundBitsMask) == 0) {
				4886	z.low &= ~lastBitMask;
				4887	}
				4888	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	4889	case float_round_ties_away:
				4890	z.low += lastBitMask >> 1;
				4891	break;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	4892	case float_round_to_zero:
				4893	break;
				4894	case float_round_up:
				4895	if (!extractFloatx80Sign(z)) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4896	z.low += roundBitsMask;
				4897	}
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	4898	break;
				4899	case float_round_down:
				4900	if (extractFloatx80Sign(z)) {
				4901	z.low += roundBitsMask;
				4902	}
				4903	break;
				4904	default:
				4905	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4906	}
				4907	z.low &= ~ roundBitsMask;
				4908	if ( z.low == 0 ) {
				4909	++z.high;
				4910	z.low = LIT64( 0x8000000000000000 );
				4911	}
				4912	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4913	return z;
				4914
				4915	}
				4916
				4917	/*----------------------------------------------------------------------------
				4918	\| Returns the result of adding the absolute values of the extended double-
				4919	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
				4920	\| negated before being returned. `zSign' is ignored if the result is a NaN.
				4921	\| The addition is performed according to the IEC/IEEE Standard for Binary
				4922	\| Floating-Point Arithmetic.
				4923	----------------------------------------------------------------------------/
				4924
				4925	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
				4926	{
				4927	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4928	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4929	int32 expDiff;
				4930
				4931	aSig = extractFloatx80Frac( a );
				4932	aExp = extractFloatx80Exp( a );
				4933	bSig = extractFloatx80Frac( b );
				4934	bExp = extractFloatx80Exp( b );
				4935	expDiff = aExp - bExp;
				4936	if ( 0 < expDiff ) {
				4937	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4938	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4939	return a;
				4940	}
				4941	if ( bExp == 0 ) --expDiff;
				4942	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				4943	zExp = aExp;
				4944	}
				4945	else if ( expDiff < 0 ) {
				4946	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4947	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4948	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4949	}
				4950	if ( aExp == 0 ) ++expDiff;
				4951	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				4952	zExp = bExp;
				4953	}
				4954	else {
				4955	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4956	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4957	return propagateFloatx80NaN( a, b STATUS_VAR );
				4958	}
				4959	return a;
				4960	}
				4961	zSig1 = 0;
				4962	zSig0 = aSig + bSig;
				4963	if ( aExp == 0 ) {
				4964	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				4965	goto roundAndPack;
				4966	}
				4967	zExp = aExp;
				4968	goto shiftRight1;
				4969	}
				4970	zSig0 = aSig + bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4971	if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4972	shiftRight1:
				4973	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				4974	zSig0 \|= LIT64( 0x8000000000000000 );
				4975	++zExp;
				4976	roundAndPack:
				4977	return
				4978	roundAndPackFloatx80(
				4979	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4980
				4981	}
				4982
				4983	/*----------------------------------------------------------------------------
				4984	\| Returns the result of subtracting the absolute values of the extended
				4985	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
				4986	\| difference is negated before being returned. `zSign' is ignored if the
				4987	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				4988	\| Standard for Binary Floating-Point Arithmetic.
				4989	----------------------------------------------------------------------------/
				4990
				4991	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
				4992	{
				4993	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4994	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4995	int32 expDiff;
				4996	floatx80 z;
				4997
				4998	aSig = extractFloatx80Frac( a );
				4999	aExp = extractFloatx80Exp( a );
				5000	bSig = extractFloatx80Frac( b );
				5001	bExp = extractFloatx80Exp( b );
				5002	expDiff = aExp - bExp;
				5003	if ( 0 < expDiff ) goto aExpBigger;
				5004	if ( expDiff < 0 ) goto bExpBigger;
				5005	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5006	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5007	return propagateFloatx80NaN( a, b STATUS_VAR );
				5008	}
				5009	float_raise( float_flag_invalid STATUS_VAR);
				5010	z.low = floatx80_default_nan_low;
				5011	z.high = floatx80_default_nan_high;
				5012	return z;
				5013	}
				5014	if ( aExp == 0 ) {
				5015	aExp = 1;
				5016	bExp = 1;
				5017	}
				5018	zSig1 = 0;
				5019	if ( bSig < aSig ) goto aBigger;
				5020	if ( aSig < bSig ) goto bBigger;
				5021	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				5022	bExpBigger:
				5023	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5024	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5025	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5026	}
				5027	if ( aExp == 0 ) ++expDiff;
				5028	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				5029	bBigger:
				5030	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				5031	zExp = bExp;
				5032	zSign ^= 1;
				5033	goto normalizeRoundAndPack;
				5034	aExpBigger:
				5035	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5036	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5037	return a;
				5038	}
				5039	if ( bExp == 0 ) --expDiff;
				5040	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				5041	aBigger:
				5042	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				5043	zExp = aExp;
				5044	normalizeRoundAndPack:
				5045	return
				5046	normalizeRoundAndPackFloatx80(
				5047	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				5048
				5049	}
				5050
				5051	/*----------------------------------------------------------------------------
				5052	\| Returns the result of adding the extended double-precision floating-point
				5053	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				5054	\| Standard for Binary Floating-Point Arithmetic.
				5055	----------------------------------------------------------------------------/
				5056
				5057	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
				5058	{
				5059	flag aSign, bSign;
				5060
				5061	aSign = extractFloatx80Sign( a );
				5062	bSign = extractFloatx80Sign( b );
				5063	if ( aSign == bSign ) {
				5064	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				5065	}
				5066	else {
				5067	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				5068	}
				5069
				5070	}
				5071
				5072	/*----------------------------------------------------------------------------
				5073	\| Returns the result of subtracting the extended double-precision floating-
				5074	\| point values `a' and `b'. The operation is performed according to the
				5075	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5076	----------------------------------------------------------------------------/
				5077
				5078	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
				5079	{
				5080	flag aSign, bSign;
				5081
				5082	aSign = extractFloatx80Sign( a );
				5083	bSign = extractFloatx80Sign( b );
				5084	if ( aSign == bSign ) {
				5085	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				5086	}
				5087	else {
				5088	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				5089	}
				5090
				5091	}
				5092
				5093	/*----------------------------------------------------------------------------
				5094	\| Returns the result of multiplying the extended double-precision floating-
				5095	\| point values `a' and `b'. The operation is performed according to the
				5096	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5097	----------------------------------------------------------------------------/
				5098
				5099	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
				5100	{
				5101	flag aSign, bSign, zSign;
				5102	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5103	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5104	floatx80 z;
				5105
				5106	aSig = extractFloatx80Frac( a );
				5107	aExp = extractFloatx80Exp( a );
				5108	aSign = extractFloatx80Sign( a );
				5109	bSig = extractFloatx80Frac( b );
				5110	bExp = extractFloatx80Exp( b );
				5111	bSign = extractFloatx80Sign( b );
				5112	zSign = aSign ^ bSign;
				5113	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5114	if ( (uint64_t) ( aSig<<1 )
				5115	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5116	return propagateFloatx80NaN( a, b STATUS_VAR );
				5117	}
				5118	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				5119	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5120	}
				5121	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5122	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5123	if ( ( aExp \| aSig ) == 0 ) {
				5124	invalid:
				5125	float_raise( float_flag_invalid STATUS_VAR);
				5126	z.low = floatx80_default_nan_low;
				5127	z.high = floatx80_default_nan_high;
				5128	return z;
				5129	}
				5130	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5131	}
				5132	if ( aExp == 0 ) {
				5133	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				5134	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				5135	}
				5136	if ( bExp == 0 ) {
				5137	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				5138	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				5139	}
				5140	zExp = aExp + bExp - 0x3FFE;
				5141	mul64To128( aSig, bSig, &zSig0, &zSig1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5142	if ( 0 < (int64_t) zSig0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5143	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				5144	--zExp;
				5145	}
				5146	return
				5147	roundAndPackFloatx80(
				5148	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				5149
				5150	}
				5151
				5152	/*----------------------------------------------------------------------------
				5153	\| Returns the result of dividing the extended double-precision floating-point
				5154	\| value `a' by the corresponding value `b'. The operation is performed
				5155	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5156	----------------------------------------------------------------------------/
				5157
				5158	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
				5159	{
				5160	flag aSign, bSign, zSign;
				5161	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5162	uint64_t aSig, bSig, zSig0, zSig1;
				5163	uint64_t rem0, rem1, rem2, term0, term1, term2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5164	floatx80 z;
				5165
				5166	aSig = extractFloatx80Frac( a );
				5167	aExp = extractFloatx80Exp( a );
				5168	aSign = extractFloatx80Sign( a );
				5169	bSig = extractFloatx80Frac( b );
				5170	bExp = extractFloatx80Exp( b );
				5171	bSign = extractFloatx80Sign( b );
				5172	zSign = aSign ^ bSign;
				5173	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5174	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5175	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5176	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5177	goto invalid;
				5178	}
				5179	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5180	}
				5181	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5182	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5183	return packFloatx80( zSign, 0, 0 );
				5184	}
				5185	if ( bExp == 0 ) {
				5186	if ( bSig == 0 ) {
				5187	if ( ( aExp \| aSig ) == 0 ) {
				5188	invalid:
				5189	float_raise( float_flag_invalid STATUS_VAR);
				5190	z.low = floatx80_default_nan_low;
				5191	z.high = floatx80_default_nan_high;
				5192	return z;
				5193	}
				5194	float_raise( float_flag_divbyzero STATUS_VAR);
				5195	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5196	}
				5197	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				5198	}
				5199	if ( aExp == 0 ) {
				5200	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				5201	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				5202	}
				5203	zExp = aExp - bExp + 0x3FFE;
				5204	rem1 = 0;
				5205	if ( bSig <= aSig ) {
				5206	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				5207	++zExp;
				5208	}
				5209	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				5210	mul64To128( bSig, zSig0, &term0, &term1 );
				5211	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5212	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5213	--zSig0;
				5214	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				5215	}
				5216	zSig1 = estimateDiv128To64( rem1, 0, bSig );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5217	if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5218	mul64To128( bSig, zSig1, &term1, &term2 );
				5219	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5220	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5221	--zSig1;
				5222	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				5223	}
				5224	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				5225	}
				5226	return
				5227	roundAndPackFloatx80(
				5228	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				5229
				5230	}
				5231
				5232	/*----------------------------------------------------------------------------
				5233	\| Returns the remainder of the extended double-precision floating-point value
				5234	\| `a' with respect to the corresponding value `b'. The operation is performed
				5235	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5236	----------------------------------------------------------------------------/
				5237
				5238	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
				5239	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	5240	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5241	int32 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5242	uint64_t aSig0, aSig1, bSig;
				5243	uint64_t q, term0, term1, alternateASig0, alternateASig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5244	floatx80 z;
				5245
				5246	aSig0 = extractFloatx80Frac( a );
				5247	aExp = extractFloatx80Exp( a );
				5248	aSign = extractFloatx80Sign( a );
				5249	bSig = extractFloatx80Frac( b );
				5250	bExp = extractFloatx80Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5251	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5252	if ( (uint64_t) ( aSig0<<1 )
				5253	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5254	return propagateFloatx80NaN( a, b STATUS_VAR );
				5255	}
				5256	goto invalid;
				5257	}
				5258	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5259	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5260	return a;
				5261	}
				5262	if ( bExp == 0 ) {
				5263	if ( bSig == 0 ) {
				5264	invalid:
				5265	float_raise( float_flag_invalid STATUS_VAR);
				5266	z.low = floatx80_default_nan_low;
				5267	z.high = floatx80_default_nan_high;
				5268	return z;
				5269	}
				5270	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				5271	}
				5272	if ( aExp == 0 ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5273	if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5274	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				5275	}
				5276	bSig \|= LIT64( 0x8000000000000000 );
				5277	zSign = aSign;
				5278	expDiff = aExp - bExp;
				5279	aSig1 = 0;
				5280	if ( expDiff < 0 ) {
				5281	if ( expDiff < -1 ) return a;
				5282	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				5283	expDiff = 0;
				5284	}
				5285	q = ( bSig <= aSig0 );
				5286	if ( q ) aSig0 -= bSig;
				5287	expDiff -= 64;
				5288	while ( 0 < expDiff ) {
				5289	q = estimateDiv128To64( aSig0, aSig1, bSig );
				5290	q = ( 2 < q ) ? q - 2 : 0;
				5291	mul64To128( bSig, q, &term0, &term1 );
				5292	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				5293	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				5294	expDiff -= 62;
				5295	}
				5296	expDiff += 64;
				5297	if ( 0 < expDiff ) {
				5298	q = estimateDiv128To64( aSig0, aSig1, bSig );
				5299	q = ( 2 < q ) ? q - 2 : 0;
				5300	q >>= 64 - expDiff;
				5301	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				5302	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				5303	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				5304	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				5305	++q;
				5306	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				5307	}
				5308	}
				5309	else {
				5310	term1 = 0;
				5311	term0 = bSig;
				5312	}
				5313	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				5314	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				5315	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				5316	&& ( q & 1 ) )
				5317	) {
				5318	aSig0 = alternateASig0;
				5319	aSig1 = alternateASig1;
				5320	zSign = ! zSign;
				5321	}
				5322	return
				5323	normalizeRoundAndPackFloatx80(
				5324	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
				5325
				5326	}
				5327
				5328	/*----------------------------------------------------------------------------
				5329	\| Returns the square root of the extended double-precision floating-point
				5330	\| value `a'. The operation is performed according to the IEC/IEEE Standard
				5331	\| for Binary Floating-Point Arithmetic.
				5332	----------------------------------------------------------------------------/
				5333
				5334	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
				5335	{
				5336	flag aSign;
				5337	int32 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5338	uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
				5339	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5340	floatx80 z;
				5341
				5342	aSig0 = extractFloatx80Frac( a );
				5343	aExp = extractFloatx80Exp( a );
				5344	aSign = extractFloatx80Sign( a );
				5345	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5346	if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5347	if ( ! aSign ) return a;
				5348	goto invalid;
				5349	}
				5350	if ( aSign ) {
				5351	if ( ( aExp \| aSig0 ) == 0 ) return a;
				5352	invalid:
				5353	float_raise( float_flag_invalid STATUS_VAR);
				5354	z.low = floatx80_default_nan_low;
				5355	z.high = floatx80_default_nan_high;
				5356	return z;
				5357	}
				5358	if ( aExp == 0 ) {
				5359	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				5360	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				5361	}
				5362	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				5363	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				5364	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
				5365	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				5366	doubleZSig0 = zSig0<<1;
				5367	mul64To128( zSig0, zSig0, &term0, &term1 );
				5368	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5369	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5370	--zSig0;
				5371	doubleZSig0 -= 2;
				5372	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				5373	}
				5374	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				5375	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
				5376	if ( zSig1 == 0 ) zSig1 = 1;
				5377	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				5378	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				5379	mul64To128( zSig1, zSig1, &term2, &term3 );
				5380	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5381	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5382	--zSig1;
				5383	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				5384	term3 \|= 1;
				5385	term2 \|= doubleZSig0;
				5386	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				5387	}
				5388	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				5389	}
				5390	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
				5391	zSig0 \|= doubleZSig0;
				5392	return
				5393	roundAndPackFloatx80(
				5394	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
				5395
				5396	}
				5397
				5398	/*----------------------------------------------------------------------------
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5399	\| Returns 1 if the extended double-precision floating-point value `a' is equal
				5400	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
				5401	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				5402	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5403	----------------------------------------------------------------------------/
				5404
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5405	int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5406	{
				5407
				5408	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5409	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5410	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5411	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5412	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5413	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5414	return 0;
				5415	}
				5416	return
				5417	( a.low == b.low )
				5418	&& ( ( a.high == b.high )
				5419	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5420	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5421	);
				5422
				5423	}
				5424
				5425	/*----------------------------------------------------------------------------
				5426	\| Returns 1 if the extended double-precision floating-point value `a' is
				5427	\| less than or equal to the corresponding value `b', and 0 otherwise. The
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5428	\| invalid exception is raised if either operand is a NaN. The comparison is
				5429	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				5430	\| Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5431	----------------------------------------------------------------------------/
				5432
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5433	int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5434	{
				5435	flag aSign, bSign;
				5436
				5437	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5438	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5439	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5440	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5441	) {
				5442	float_raise( float_flag_invalid STATUS_VAR);
				5443	return 0;
				5444	}
				5445	aSign = extractFloatx80Sign( a );
				5446	bSign = extractFloatx80Sign( b );
				5447	if ( aSign != bSign ) {
				5448	return
				5449	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5450	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5451	== 0 );
				5452	}
				5453	return
				5454	aSign ? le128( b.high, b.low, a.high, a.low )
				5455	: le128( a.high, a.low, b.high, b.low );
				5456
				5457	}
				5458
				5459	/*----------------------------------------------------------------------------
				5460	\| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5461	\| less than the corresponding value `b', and 0 otherwise. The invalid
				5462	\| exception is raised if either operand is a NaN. The comparison is performed
				5463	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5464	----------------------------------------------------------------------------/
				5465
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5466	int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5467	{
				5468	flag aSign, bSign;
				5469
				5470	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5471	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5472	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5473	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5474	) {
				5475	float_raise( float_flag_invalid STATUS_VAR);
				5476	return 0;
				5477	}
				5478	aSign = extractFloatx80Sign( a );
				5479	bSign = extractFloatx80Sign( b );
				5480	if ( aSign != bSign ) {
				5481	return
				5482	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5483	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5484	!= 0 );
				5485	}
				5486	return
				5487	aSign ? lt128( b.high, b.low, a.high, a.low )
				5488	: lt128( a.high, a.low, b.high, b.low );
				5489
				5490	}
				5491
				5492	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5493	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5494	\| cannot be compared, and 0 otherwise. The invalid exception is raised if
				5495	\| either operand is a NaN. The comparison is performed according to the
				5496	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5497	----------------------------------------------------------------------------/
				5498	int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
				5499	{
				5500	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				5501	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
				5502	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				5503	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
				5504	) {
				5505	float_raise( float_flag_invalid STATUS_VAR);
				5506	return 1;
				5507	}
				5508	return 0;
				5509	}
				5510
				5511	/*----------------------------------------------------------------------------
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5512	\| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5513	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				5514	\| cause an exception. The comparison is performed according to the IEC/IEEE
				5515	\| Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5516	----------------------------------------------------------------------------/
				5517
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5518	int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5519	{
				5520
				5521	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5522	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5523	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5524	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5525	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5526	if ( floatx80_is_signaling_nan( a )
				5527	\|\| floatx80_is_signaling_nan( b ) ) {
				5528	float_raise( float_flag_invalid STATUS_VAR);
				5529	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5530	return 0;
				5531	}
				5532	return
				5533	( a.low == b.low )
				5534	&& ( ( a.high == b.high )
				5535	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5536	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5537	);
				5538
				5539	}
				5540
				5541	/*----------------------------------------------------------------------------
				5542	\| Returns 1 if the extended double-precision floating-point value `a' is less
				5543	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				5544	\| do not cause an exception. Otherwise, the comparison is performed according
				5545	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5546	----------------------------------------------------------------------------/
				5547
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5548	int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5549	{
				5550	flag aSign, bSign;
				5551
				5552	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5553	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5554	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5555	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5556	) {
				5557	if ( floatx80_is_signaling_nan( a )
				5558	\|\| floatx80_is_signaling_nan( b ) ) {
				5559	float_raise( float_flag_invalid STATUS_VAR);
				5560	}
				5561	return 0;
				5562	}
				5563	aSign = extractFloatx80Sign( a );
				5564	bSign = extractFloatx80Sign( b );
				5565	if ( aSign != bSign ) {
				5566	return
				5567	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5568	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5569	== 0 );
				5570	}
				5571	return
				5572	aSign ? le128( b.high, b.low, a.high, a.low )
				5573	: le128( a.high, a.low, b.high, b.low );
				5574
				5575	}
				5576
				5577	/*----------------------------------------------------------------------------
				5578	\| Returns 1 if the extended double-precision floating-point value `a' is less
				5579	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				5580	\| an exception. Otherwise, the comparison is performed according to the
				5581	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5582	----------------------------------------------------------------------------/
				5583
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5584	int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5585	{
				5586	flag aSign, bSign;
				5587
				5588	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5589	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5590	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5591	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5592	) {
				5593	if ( floatx80_is_signaling_nan( a )
				5594	\|\| floatx80_is_signaling_nan( b ) ) {
				5595	float_raise( float_flag_invalid STATUS_VAR);
				5596	}
				5597	return 0;
				5598	}
				5599	aSign = extractFloatx80Sign( a );
				5600	bSign = extractFloatx80Sign( b );
				5601	if ( aSign != bSign ) {
				5602	return
				5603	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5604	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5605	!= 0 );
				5606	}
				5607	return
				5608	aSign ? lt128( b.high, b.low, a.high, a.low )
				5609	: lt128( a.high, a.low, b.high, b.low );
				5610
				5611	}
				5612
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5613	/*----------------------------------------------------------------------------
				5614	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
				5615	\| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
				5616	\| The comparison is performed according to the IEC/IEEE Standard for Binary
				5617	\| Floating-Point Arithmetic.
				5618	----------------------------------------------------------------------------/
				5619	int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
				5620	{
				5621	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				5622	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
				5623	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				5624	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
				5625	) {
				5626	if ( floatx80_is_signaling_nan( a )
				5627	\|\| floatx80_is_signaling_nan( b ) ) {
				5628	float_raise( float_flag_invalid STATUS_VAR);
				5629	}
				5630	return 1;
				5631	}
				5632	return 0;
				5633	}
				5634
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5635	/*----------------------------------------------------------------------------
				5636	\| Returns the result of converting the quadruple-precision floating-point
				5637	\| value `a' to the 32-bit two's complement integer format. The conversion
				5638	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5639	\| Arithmetic---which means in particular that the conversion is rounded
				5640	\| according to the current rounding mode. If `a' is a NaN, the largest
				5641	\| positive integer is returned. Otherwise, if the conversion overflows, the
				5642	\| largest integer with the same sign as `a' is returned.
				5643	----------------------------------------------------------------------------/
				5644
				5645	int32 float128_to_int32( float128 a STATUS_PARAM )
				5646	{
				5647	flag aSign;
				5648	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5649	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5650
				5651	aSig1 = extractFloat128Frac1( a );
				5652	aSig0 = extractFloat128Frac0( a );
				5653	aExp = extractFloat128Exp( a );
				5654	aSign = extractFloat128Sign( a );
				5655	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
				5656	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5657	aSig0 \|= ( aSig1 != 0 );
				5658	shiftCount = 0x4028 - aExp;
				5659	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
				5660	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
				5661
				5662	}
				5663
				5664	/*----------------------------------------------------------------------------
				5665	\| Returns the result of converting the quadruple-precision floating-point
				5666	\| value `a' to the 32-bit two's complement integer format. The conversion
				5667	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5668	\| Arithmetic, except that the conversion is always rounded toward zero. If
				5669	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				5670	\| conversion overflows, the largest integer with the same sign as `a' is
				5671	\| returned.
				5672	----------------------------------------------------------------------------/
				5673
				5674	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
				5675	{
				5676	flag aSign;
				5677	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5678	uint64_t aSig0, aSig1, savedASig;
Peter Maydell	b3a6a2e	2012-04-05 19:12:34 +0100	[diff] [blame]	5679	int32_t z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5680
				5681	aSig1 = extractFloat128Frac1( a );
				5682	aSig0 = extractFloat128Frac0( a );
				5683	aExp = extractFloat128Exp( a );
				5684	aSign = extractFloat128Sign( a );
				5685	aSig0 \|= ( aSig1 != 0 );
				5686	if ( 0x401E < aExp ) {
				5687	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
				5688	goto invalid;
				5689	}
				5690	else if ( aExp < 0x3FFF ) {
				5691	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				5692	return 0;
				5693	}
				5694	aSig0 \|= LIT64( 0x0001000000000000 );
				5695	shiftCount = 0x402F - aExp;
				5696	savedASig = aSig0;
				5697	aSig0 >>= shiftCount;
				5698	z = aSig0;
				5699	if ( aSign ) z = - z;
				5700	if ( ( z < 0 ) ^ aSign ) {
				5701	invalid:
				5702	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5703	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5704	}
				5705	if ( ( aSig0<<shiftCount ) != savedASig ) {
				5706	STATUS(float_exception_flags) \|= float_flag_inexact;
				5707	}
				5708	return z;
				5709
				5710	}
				5711
				5712	/*----------------------------------------------------------------------------
				5713	\| Returns the result of converting the quadruple-precision floating-point
				5714	\| value `a' to the 64-bit two's complement integer format. The conversion
				5715	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5716	\| Arithmetic---which means in particular that the conversion is rounded
				5717	\| according to the current rounding mode. If `a' is a NaN, the largest
				5718	\| positive integer is returned. Otherwise, if the conversion overflows, the
				5719	\| largest integer with the same sign as `a' is returned.
				5720	----------------------------------------------------------------------------/
				5721
				5722	int64 float128_to_int64( float128 a STATUS_PARAM )
				5723	{
				5724	flag aSign;
				5725	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5726	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5727
				5728	aSig1 = extractFloat128Frac1( a );
				5729	aSig0 = extractFloat128Frac0( a );
				5730	aExp = extractFloat128Exp( a );
				5731	aSign = extractFloat128Sign( a );
				5732	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5733	shiftCount = 0x402F - aExp;
				5734	if ( shiftCount <= 0 ) {
				5735	if ( 0x403E < aExp ) {
				5736	float_raise( float_flag_invalid STATUS_VAR);
				5737	if ( ! aSign
				5738	\|\| ( ( aExp == 0x7FFF )
				5739	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
				5740	)
				5741	) {
				5742	return LIT64( 0x7FFFFFFFFFFFFFFF );
				5743	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5744	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5745	}
				5746	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
				5747	}
				5748	else {
				5749	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
				5750	}
				5751	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
				5752
				5753	}
				5754
				5755	/*----------------------------------------------------------------------------
				5756	\| Returns the result of converting the quadruple-precision floating-point
				5757	\| value `a' to the 64-bit two's complement integer format. The conversion
				5758	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5759	\| Arithmetic, except that the conversion is always rounded toward zero.
				5760	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				5761	\| the conversion overflows, the largest integer with the same sign as `a' is
				5762	\| returned.
				5763	----------------------------------------------------------------------------/
				5764
				5765	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
				5766	{
				5767	flag aSign;
				5768	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5769	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5770	int64 z;
				5771
				5772	aSig1 = extractFloat128Frac1( a );
				5773	aSig0 = extractFloat128Frac0( a );
				5774	aExp = extractFloat128Exp( a );
				5775	aSign = extractFloat128Sign( a );
				5776	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5777	shiftCount = aExp - 0x402F;
				5778	if ( 0 < shiftCount ) {
				5779	if ( 0x403E <= aExp ) {
				5780	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
				5781	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
				5782	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
				5783	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				5784	}
				5785	else {
				5786	float_raise( float_flag_invalid STATUS_VAR);
				5787	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
				5788	return LIT64( 0x7FFFFFFFFFFFFFFF );
				5789	}
				5790	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5791	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5792	}
				5793	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5794	if ( (uint64_t) ( aSig1<<shiftCount ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5795	STATUS(float_exception_flags) \|= float_flag_inexact;
				5796	}
				5797	}
				5798	else {
				5799	if ( aExp < 0x3FFF ) {
				5800	if ( aExp \| aSig0 \| aSig1 ) {
				5801	STATUS(float_exception_flags) \|= float_flag_inexact;
				5802	}
				5803	return 0;
				5804	}
				5805	z = aSig0>>( - shiftCount );
				5806	if ( aSig1
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5807	\|\| ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5808	STATUS(float_exception_flags) \|= float_flag_inexact;
				5809	}
				5810	}
				5811	if ( aSign ) z = - z;
				5812	return z;
				5813
				5814	}
				5815
				5816	/*----------------------------------------------------------------------------
				5817	\| Returns the result of converting the quadruple-precision floating-point
				5818	\| value `a' to the single-precision floating-point format. The conversion
				5819	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5820	\| Arithmetic.
				5821	----------------------------------------------------------------------------/
				5822
				5823	float32 float128_to_float32( float128 a STATUS_PARAM )
				5824	{
				5825	flag aSign;
				5826	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5827	uint64_t aSig0, aSig1;
				5828	uint32_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5829
				5830	aSig1 = extractFloat128Frac1( a );
				5831	aSig0 = extractFloat128Frac0( a );
				5832	aExp = extractFloat128Exp( a );
				5833	aSign = extractFloat128Sign( a );
				5834	if ( aExp == 0x7FFF ) {
				5835	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5836	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5837	}
				5838	return packFloat32( aSign, 0xFF, 0 );
				5839	}
				5840	aSig0 \|= ( aSig1 != 0 );
				5841	shift64RightJamming( aSig0, 18, &aSig0 );
				5842	zSig = aSig0;
				5843	if ( aExp \|\| zSig ) {
				5844	zSig \|= 0x40000000;
				5845	aExp -= 0x3F81;
				5846	}
				5847	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				5848
				5849	}
				5850
				5851	/*----------------------------------------------------------------------------
				5852	\| Returns the result of converting the quadruple-precision floating-point
				5853	\| value `a' to the double-precision floating-point format. The conversion
				5854	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5855	\| Arithmetic.
				5856	----------------------------------------------------------------------------/
				5857
				5858	float64 float128_to_float64( float128 a STATUS_PARAM )
				5859	{
				5860	flag aSign;
				5861	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5862	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5863
				5864	aSig1 = extractFloat128Frac1( a );
				5865	aSig0 = extractFloat128Frac0( a );
				5866	aExp = extractFloat128Exp( a );
				5867	aSign = extractFloat128Sign( a );
				5868	if ( aExp == 0x7FFF ) {
				5869	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5870	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5871	}
				5872	return packFloat64( aSign, 0x7FF, 0 );
				5873	}
				5874	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				5875	aSig0 \|= ( aSig1 != 0 );
				5876	if ( aExp \|\| aSig0 ) {
				5877	aSig0 \|= LIT64( 0x4000000000000000 );
				5878	aExp -= 0x3C01;
				5879	}
				5880	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
				5881
				5882	}
				5883
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5884	/*----------------------------------------------------------------------------
				5885	\| Returns the result of converting the quadruple-precision floating-point
				5886	\| value `a' to the extended double-precision floating-point format. The
				5887	\| conversion is performed according to the IEC/IEEE Standard for Binary
				5888	\| Floating-Point Arithmetic.
				5889	----------------------------------------------------------------------------/
				5890
				5891	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
				5892	{
				5893	flag aSign;
				5894	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5895	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5896
				5897	aSig1 = extractFloat128Frac1( a );
				5898	aSig0 = extractFloat128Frac0( a );
				5899	aExp = extractFloat128Exp( a );
				5900	aSign = extractFloat128Sign( a );
				5901	if ( aExp == 0x7FFF ) {
				5902	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5903	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5904	}
				5905	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5906	}
				5907	if ( aExp == 0 ) {
				5908	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
				5909	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5910	}
				5911	else {
				5912	aSig0 \|= LIT64( 0x0001000000000000 );
				5913	}
				5914	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
				5915	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
				5916
				5917	}
				5918
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5919	/*----------------------------------------------------------------------------
				5920	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
				5921	\| returns the result as a quadruple-precision floating-point value. The
				5922	\| operation is performed according to the IEC/IEEE Standard for Binary
				5923	\| Floating-Point Arithmetic.
				5924	----------------------------------------------------------------------------/
				5925
				5926	float128 float128_round_to_int( float128 a STATUS_PARAM )
				5927	{
				5928	flag aSign;
				5929	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5930	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5931	float128 z;
				5932
				5933	aExp = extractFloat128Exp( a );
				5934	if ( 0x402F <= aExp ) {
				5935	if ( 0x406F <= aExp ) {
				5936	if ( ( aExp == 0x7FFF )
				5937	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
				5938	) {
				5939	return propagateFloat128NaN( a, a STATUS_VAR );
				5940	}
				5941	return a;
				5942	}
				5943	lastBitMask = 1;
				5944	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
				5945	roundBitsMask = lastBitMask - 1;
				5946	z = a;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	5947	switch (STATUS(float_rounding_mode)) {
				5948	case float_round_nearest_even:
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5949	if ( lastBitMask ) {
				5950	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
				5951	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				5952	}
				5953	else {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5954	if ( (int64_t) z.low < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5955	++z.high;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5956	if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5957	}
				5958	}
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	5959	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	5960	case float_round_ties_away:
				5961	if (lastBitMask) {
				5962	add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
				5963	} else {
				5964	if ((int64_t) z.low < 0) {
				5965	++z.high;
				5966	}
				5967	}
				5968	break;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	5969	case float_round_to_zero:
				5970	break;
				5971	case float_round_up:
				5972	if (!extractFloat128Sign(z)) {
				5973	add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5974	}
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	5975	break;
				5976	case float_round_down:
				5977	if (extractFloat128Sign(z)) {
				5978	add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
				5979	}
				5980	break;
				5981	default:
				5982	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5983	}
				5984	z.low &= ~ roundBitsMask;
				5985	}
				5986	else {
				5987	if ( aExp < 0x3FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5988	if ( ( ( (uint64_t) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5989	STATUS(float_exception_flags) \|= float_flag_inexact;
				5990	aSign = extractFloat128Sign( a );
				5991	switch ( STATUS(float_rounding_mode) ) {
				5992	case float_round_nearest_even:
				5993	if ( ( aExp == 0x3FFE )
				5994	&& ( extractFloat128Frac0( a )
				5995	\| extractFloat128Frac1( a ) )
				5996	) {
				5997	return packFloat128( aSign, 0x3FFF, 0, 0 );
				5998	}
				5999	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	6000	case float_round_ties_away:
				6001	if (aExp == 0x3FFE) {
				6002	return packFloat128(aSign, 0x3FFF, 0, 0);
				6003	}
				6004	break;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6005	case float_round_down:
				6006	return
				6007	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
				6008	: packFloat128( 0, 0, 0, 0 );
				6009	case float_round_up:
				6010	return
				6011	aSign ? packFloat128( 1, 0, 0, 0 )
				6012	: packFloat128( 0, 0x3FFF, 0, 0 );
				6013	}
				6014	return packFloat128( aSign, 0, 0, 0 );
				6015	}
				6016	lastBitMask = 1;
				6017	lastBitMask <<= 0x402F - aExp;
				6018	roundBitsMask = lastBitMask - 1;
				6019	z.low = 0;
				6020	z.high = a.high;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	6021	switch (STATUS(float_rounding_mode)) {
				6022	case float_round_nearest_even:
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6023	z.high += lastBitMask>>1;
				6024	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
				6025	z.high &= ~ lastBitMask;
				6026	}
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	6027	break;
Peter Maydell	f9288a7	2014-01-07 17:19:12 +0000	[diff] [blame]	6028	case float_round_ties_away:
				6029	z.high += lastBitMask>>1;
				6030	break;
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	6031	case float_round_to_zero:
				6032	break;
				6033	case float_round_up:
				6034	if (!extractFloat128Sign(z)) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6035	z.high \|= ( a.low != 0 );
				6036	z.high += roundBitsMask;
				6037	}
Peter Maydell	dc355b7	2014-01-07 17:19:12 +0000	[diff] [blame]	6038	break;
				6039	case float_round_down:
				6040	if (extractFloat128Sign(z)) {
				6041	z.high \|= (a.low != 0);
				6042	z.high += roundBitsMask;
				6043	}
				6044	break;
				6045	default:
				6046	abort();
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6047	}
				6048	z.high &= ~ roundBitsMask;
				6049	}
				6050	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
				6051	STATUS(float_exception_flags) \|= float_flag_inexact;
				6052	}
				6053	return z;
				6054
				6055	}
				6056
				6057	/*----------------------------------------------------------------------------
				6058	\| Returns the result of adding the absolute values of the quadruple-precision
				6059	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				6060	\| before being returned. `zSign' is ignored if the result is a NaN.
				6061	\| The addition is performed according to the IEC/IEEE Standard for Binary
				6062	\| Floating-Point Arithmetic.
				6063	----------------------------------------------------------------------------/
				6064
				6065	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				6066	{
				6067	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6068	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6069	int32 expDiff;
				6070
				6071	aSig1 = extractFloat128Frac1( a );
				6072	aSig0 = extractFloat128Frac0( a );
				6073	aExp = extractFloat128Exp( a );
				6074	bSig1 = extractFloat128Frac1( b );
				6075	bSig0 = extractFloat128Frac0( b );
				6076	bExp = extractFloat128Exp( b );
				6077	expDiff = aExp - bExp;
				6078	if ( 0 < expDiff ) {
				6079	if ( aExp == 0x7FFF ) {
				6080	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6081	return a;
				6082	}
				6083	if ( bExp == 0 ) {
				6084	--expDiff;
				6085	}
				6086	else {
				6087	bSig0 \|= LIT64( 0x0001000000000000 );
				6088	}
				6089	shift128ExtraRightJamming(
				6090	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
				6091	zExp = aExp;
				6092	}
				6093	else if ( expDiff < 0 ) {
				6094	if ( bExp == 0x7FFF ) {
				6095	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6096	return packFloat128( zSign, 0x7FFF, 0, 0 );
				6097	}
				6098	if ( aExp == 0 ) {
				6099	++expDiff;
				6100	}
				6101	else {
				6102	aSig0 \|= LIT64( 0x0001000000000000 );
				6103	}
				6104	shift128ExtraRightJamming(
				6105	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
				6106	zExp = bExp;
				6107	}
				6108	else {
				6109	if ( aExp == 0x7FFF ) {
				6110	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				6111	return propagateFloat128NaN( a, b STATUS_VAR );
				6112	}
				6113	return a;
				6114	}
				6115	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	6116	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	6117	if (STATUS(flush_to_zero)) {
				6118	if (zSig0 \| zSig1) {
				6119	float_raise(float_flag_output_denormal STATUS_VAR);
				6120	}
				6121	return packFloat128(zSign, 0, 0, 0);
				6122	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	6123	return packFloat128( zSign, 0, zSig0, zSig1 );
				6124	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6125	zSig2 = 0;
				6126	zSig0 \|= LIT64( 0x0002000000000000 );
				6127	zExp = aExp;
				6128	goto shiftRight1;
				6129	}
				6130	aSig0 \|= LIT64( 0x0001000000000000 );
				6131	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				6132	--zExp;
				6133	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
				6134	++zExp;
				6135	shiftRight1:
				6136	shift128ExtraRightJamming(
				6137	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				6138	roundAndPack:
				6139	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				6140
				6141	}
				6142
				6143	/*----------------------------------------------------------------------------
				6144	\| Returns the result of subtracting the absolute values of the quadruple-
				6145	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				6146	\| difference is negated before being returned. `zSign' is ignored if the
				6147	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				6148	\| Standard for Binary Floating-Point Arithmetic.
				6149	----------------------------------------------------------------------------/
				6150
				6151	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				6152	{
				6153	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6154	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6155	int32 expDiff;
				6156	float128 z;
				6157
				6158	aSig1 = extractFloat128Frac1( a );
				6159	aSig0 = extractFloat128Frac0( a );
				6160	aExp = extractFloat128Exp( a );
				6161	bSig1 = extractFloat128Frac1( b );
				6162	bSig0 = extractFloat128Frac0( b );
				6163	bExp = extractFloat128Exp( b );
				6164	expDiff = aExp - bExp;
				6165	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				6166	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
				6167	if ( 0 < expDiff ) goto aExpBigger;
				6168	if ( expDiff < 0 ) goto bExpBigger;
				6169	if ( aExp == 0x7FFF ) {
				6170	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				6171	return propagateFloat128NaN( a, b STATUS_VAR );
				6172	}
				6173	float_raise( float_flag_invalid STATUS_VAR);
				6174	z.low = float128_default_nan_low;
				6175	z.high = float128_default_nan_high;
				6176	return z;
				6177	}
				6178	if ( aExp == 0 ) {
				6179	aExp = 1;
				6180	bExp = 1;
				6181	}
				6182	if ( bSig0 < aSig0 ) goto aBigger;
				6183	if ( aSig0 < bSig0 ) goto bBigger;
				6184	if ( bSig1 < aSig1 ) goto aBigger;
				6185	if ( aSig1 < bSig1 ) goto bBigger;
				6186	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
				6187	bExpBigger:
				6188	if ( bExp == 0x7FFF ) {
				6189	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6190	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
				6191	}
				6192	if ( aExp == 0 ) {
				6193	++expDiff;
				6194	}
				6195	else {
				6196	aSig0 \|= LIT64( 0x4000000000000000 );
				6197	}
				6198	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				6199	bSig0 \|= LIT64( 0x4000000000000000 );
				6200	bBigger:
				6201	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
				6202	zExp = bExp;
				6203	zSign ^= 1;
				6204	goto normalizeRoundAndPack;
				6205	aExpBigger:
				6206	if ( aExp == 0x7FFF ) {
				6207	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6208	return a;
				6209	}
				6210	if ( bExp == 0 ) {
				6211	--expDiff;
				6212	}
				6213	else {
				6214	bSig0 \|= LIT64( 0x4000000000000000 );
				6215	}
				6216	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
				6217	aSig0 \|= LIT64( 0x4000000000000000 );
				6218	aBigger:
				6219	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				6220	zExp = aExp;
				6221	normalizeRoundAndPack:
				6222	--zExp;
				6223	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
				6224
				6225	}
				6226
				6227	/*----------------------------------------------------------------------------
				6228	\| Returns the result of adding the quadruple-precision floating-point values
				6229	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				6230	\| for Binary Floating-Point Arithmetic.
				6231	----------------------------------------------------------------------------/
				6232
				6233	float128 float128_add( float128 a, float128 b STATUS_PARAM )
				6234	{
				6235	flag aSign, bSign;
				6236
				6237	aSign = extractFloat128Sign( a );
				6238	bSign = extractFloat128Sign( b );
				6239	if ( aSign == bSign ) {
				6240	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				6241	}
				6242	else {
				6243	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				6244	}
				6245
				6246	}
				6247
				6248	/*----------------------------------------------------------------------------
				6249	\| Returns the result of subtracting the quadruple-precision floating-point
				6250	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				6251	\| Standard for Binary Floating-Point Arithmetic.
				6252	----------------------------------------------------------------------------/
				6253
				6254	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
				6255	{
				6256	flag aSign, bSign;
				6257
				6258	aSign = extractFloat128Sign( a );
				6259	bSign = extractFloat128Sign( b );
				6260	if ( aSign == bSign ) {
				6261	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				6262	}
				6263	else {
				6264	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				6265	}
				6266
				6267	}
				6268
				6269	/*----------------------------------------------------------------------------
				6270	\| Returns the result of multiplying the quadruple-precision floating-point
				6271	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				6272	\| Standard for Binary Floating-Point Arithmetic.
				6273	----------------------------------------------------------------------------/
				6274
				6275	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
				6276	{
				6277	flag aSign, bSign, zSign;
				6278	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6279	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6280	float128 z;
				6281
				6282	aSig1 = extractFloat128Frac1( a );
				6283	aSig0 = extractFloat128Frac0( a );
				6284	aExp = extractFloat128Exp( a );
				6285	aSign = extractFloat128Sign( a );
				6286	bSig1 = extractFloat128Frac1( b );
				6287	bSig0 = extractFloat128Frac0( b );
				6288	bExp = extractFloat128Exp( b );
				6289	bSign = extractFloat128Sign( b );
				6290	zSign = aSign ^ bSign;
				6291	if ( aExp == 0x7FFF ) {
				6292	if ( ( aSig0 \| aSig1 )
				6293	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				6294	return propagateFloat128NaN( a, b STATUS_VAR );
				6295	}
				6296	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
				6297	return packFloat128( zSign, 0x7FFF, 0, 0 );
				6298	}
				6299	if ( bExp == 0x7FFF ) {
				6300	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6301	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				6302	invalid:
				6303	float_raise( float_flag_invalid STATUS_VAR);
				6304	z.low = float128_default_nan_low;
				6305	z.high = float128_default_nan_high;
				6306	return z;
				6307	}
				6308	return packFloat128( zSign, 0x7FFF, 0, 0 );
				6309	}
				6310	if ( aExp == 0 ) {
				6311	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				6312	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6313	}
				6314	if ( bExp == 0 ) {
				6315	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				6316	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				6317	}
				6318	zExp = aExp + bExp - 0x4000;
				6319	aSig0 \|= LIT64( 0x0001000000000000 );
				6320	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
				6321	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
				6322	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
				6323	zSig2 \|= ( zSig3 != 0 );
				6324	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
				6325	shift128ExtraRightJamming(
				6326	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				6327	++zExp;
				6328	}
				6329	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				6330
				6331	}
				6332
				6333	/*----------------------------------------------------------------------------
				6334	\| Returns the result of dividing the quadruple-precision floating-point value
				6335	\| `a' by the corresponding value `b'. The operation is performed according to
				6336	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6337	----------------------------------------------------------------------------/
				6338
				6339	float128 float128_div( float128 a, float128 b STATUS_PARAM )
				6340	{
				6341	flag aSign, bSign, zSign;
				6342	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6343	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				6344	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6345	float128 z;
				6346
				6347	aSig1 = extractFloat128Frac1( a );
				6348	aSig0 = extractFloat128Frac0( a );
				6349	aExp = extractFloat128Exp( a );
				6350	aSign = extractFloat128Sign( a );
				6351	bSig1 = extractFloat128Frac1( b );
				6352	bSig0 = extractFloat128Frac0( b );
				6353	bExp = extractFloat128Exp( b );
				6354	bSign = extractFloat128Sign( b );
				6355	zSign = aSign ^ bSign;
				6356	if ( aExp == 0x7FFF ) {
				6357	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6358	if ( bExp == 0x7FFF ) {
				6359	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6360	goto invalid;
				6361	}
				6362	return packFloat128( zSign, 0x7FFF, 0, 0 );
				6363	}
				6364	if ( bExp == 0x7FFF ) {
				6365	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6366	return packFloat128( zSign, 0, 0, 0 );
				6367	}
				6368	if ( bExp == 0 ) {
				6369	if ( ( bSig0 \| bSig1 ) == 0 ) {
				6370	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				6371	invalid:
				6372	float_raise( float_flag_invalid STATUS_VAR);
				6373	z.low = float128_default_nan_low;
				6374	z.high = float128_default_nan_high;
				6375	return z;
				6376	}
				6377	float_raise( float_flag_divbyzero STATUS_VAR);
				6378	return packFloat128( zSign, 0x7FFF, 0, 0 );
				6379	}
				6380	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				6381	}
				6382	if ( aExp == 0 ) {
				6383	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				6384	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6385	}
				6386	zExp = aExp - bExp + 0x3FFD;
				6387	shortShift128Left(
				6388	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
				6389	shortShift128Left(
				6390	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				6391	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
				6392	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
				6393	++zExp;
				6394	}
				6395	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
				6396	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
				6397	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6398	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6399	--zSig0;
				6400	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
				6401	}
				6402	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
				6403	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
				6404	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
				6405	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6406	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6407	--zSig1;
				6408	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
				6409	}
				6410	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				6411	}
				6412	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
				6413	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				6414
				6415	}
				6416
				6417	/*----------------------------------------------------------------------------
				6418	\| Returns the remainder of the quadruple-precision floating-point value `a'
				6419	\| with respect to the corresponding value `b'. The operation is performed
				6420	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6421	----------------------------------------------------------------------------/
				6422
				6423	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
				6424	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	6425	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6426	int32 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6427	uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
				6428	uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
				6429	int64_t sigMean0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6430	float128 z;
				6431
				6432	aSig1 = extractFloat128Frac1( a );
				6433	aSig0 = extractFloat128Frac0( a );
				6434	aExp = extractFloat128Exp( a );
				6435	aSign = extractFloat128Sign( a );
				6436	bSig1 = extractFloat128Frac1( b );
				6437	bSig0 = extractFloat128Frac0( b );
				6438	bExp = extractFloat128Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6439	if ( aExp == 0x7FFF ) {
				6440	if ( ( aSig0 \| aSig1 )
				6441	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				6442	return propagateFloat128NaN( a, b STATUS_VAR );
				6443	}
				6444	goto invalid;
				6445	}
				6446	if ( bExp == 0x7FFF ) {
				6447	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6448	return a;
				6449	}
				6450	if ( bExp == 0 ) {
				6451	if ( ( bSig0 \| bSig1 ) == 0 ) {
				6452	invalid:
				6453	float_raise( float_flag_invalid STATUS_VAR);
				6454	z.low = float128_default_nan_low;
				6455	z.high = float128_default_nan_high;
				6456	return z;
				6457	}
				6458	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				6459	}
				6460	if ( aExp == 0 ) {
				6461	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
				6462	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6463	}
				6464	expDiff = aExp - bExp;
				6465	if ( expDiff < -1 ) return a;
				6466	shortShift128Left(
				6467	aSig0 \| LIT64( 0x0001000000000000 ),
				6468	aSig1,
				6469	15 - ( expDiff < 0 ),
				6470	&aSig0,
				6471	&aSig1
				6472	);
				6473	shortShift128Left(
				6474	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				6475	q = le128( bSig0, bSig1, aSig0, aSig1 );
				6476	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				6477	expDiff -= 64;
				6478	while ( 0 < expDiff ) {
				6479	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				6480	q = ( 4 < q ) ? q - 4 : 0;
				6481	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				6482	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
				6483	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
				6484	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
				6485	expDiff -= 61;
				6486	}
				6487	if ( -64 < expDiff ) {
				6488	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				6489	q = ( 4 < q ) ? q - 4 : 0;
				6490	q >>= - expDiff;
				6491	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				6492	expDiff += 52;
				6493	if ( expDiff < 0 ) {
				6494	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				6495	}
				6496	else {
				6497	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
				6498	}
				6499	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				6500	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
				6501	}
				6502	else {
				6503	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
				6504	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				6505	}
				6506	do {
				6507	alternateASig0 = aSig0;
				6508	alternateASig1 = aSig1;
				6509	++q;
				6510	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6511	} while ( 0 <= (int64_t) aSig0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6512	add128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6513	aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6514	if ( ( sigMean0 < 0 )
				6515	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
				6516	aSig0 = alternateASig0;
				6517	aSig1 = alternateASig1;
				6518	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6519	zSign = ( (int64_t) aSig0 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6520	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
				6521	return
				6522	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
				6523
				6524	}
				6525
				6526	/*----------------------------------------------------------------------------
				6527	\| Returns the square root of the quadruple-precision floating-point value `a'.
				6528	\| The operation is performed according to the IEC/IEEE Standard for Binary
				6529	\| Floating-Point Arithmetic.
				6530	----------------------------------------------------------------------------/
				6531
				6532	float128 float128_sqrt( float128 a STATUS_PARAM )
				6533	{
				6534	flag aSign;
				6535	int32 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6536	uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
				6537	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6538	float128 z;
				6539
				6540	aSig1 = extractFloat128Frac1( a );
				6541	aSig0 = extractFloat128Frac0( a );
				6542	aExp = extractFloat128Exp( a );
				6543	aSign = extractFloat128Sign( a );
				6544	if ( aExp == 0x7FFF ) {
				6545	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
				6546	if ( ! aSign ) return a;
				6547	goto invalid;
				6548	}
				6549	if ( aSign ) {
				6550	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
				6551	invalid:
				6552	float_raise( float_flag_invalid STATUS_VAR);
				6553	z.low = float128_default_nan_low;
				6554	z.high = float128_default_nan_high;
				6555	return z;
				6556	}
				6557	if ( aExp == 0 ) {
				6558	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
				6559	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6560	}
				6561	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
				6562	aSig0 \|= LIT64( 0x0001000000000000 );
				6563	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
				6564	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
				6565	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				6566	doubleZSig0 = zSig0<<1;
				6567	mul64To128( zSig0, zSig0, &term0, &term1 );
				6568	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6569	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6570	--zSig0;
				6571	doubleZSig0 -= 2;
				6572	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				6573	}
				6574	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				6575	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
				6576	if ( zSig1 == 0 ) zSig1 = 1;
				6577	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				6578	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				6579	mul64To128( zSig1, zSig1, &term2, &term3 );
				6580	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6581	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6582	--zSig1;
				6583	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				6584	term3 \|= 1;
				6585	term2 \|= doubleZSig0;
				6586	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				6587	}
				6588	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				6589	}
				6590	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
				6591	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				6592
				6593	}
				6594
				6595	/*----------------------------------------------------------------------------
				6596	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6597	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				6598	\| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6599	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6600	----------------------------------------------------------------------------/
				6601
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6602	int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6603	{
				6604
				6605	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6606	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6607	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6608	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6609	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6610	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6611	return 0;
				6612	}
				6613	return
				6614	( a.low == b.low )
				6615	&& ( ( a.high == b.high )
				6616	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6617	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6618	);
				6619
				6620	}
				6621
				6622	/*----------------------------------------------------------------------------
				6623	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6624	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
				6625	\| exception is raised if either operand is a NaN. The comparison is performed
				6626	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6627	----------------------------------------------------------------------------/
				6628
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6629	int float128_le( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6630	{
				6631	flag aSign, bSign;
				6632
				6633	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6634	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6635	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6636	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6637	) {
				6638	float_raise( float_flag_invalid STATUS_VAR);
				6639	return 0;
				6640	}
				6641	aSign = extractFloat128Sign( a );
				6642	bSign = extractFloat128Sign( b );
				6643	if ( aSign != bSign ) {
				6644	return
				6645	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6646	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6647	== 0 );
				6648	}
				6649	return
				6650	aSign ? le128( b.high, b.low, a.high, a.low )
				6651	: le128( a.high, a.low, b.high, b.low );
				6652
				6653	}
				6654
				6655	/*----------------------------------------------------------------------------
				6656	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6657	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				6658	\| raised if either operand is a NaN. The comparison is performed according
				6659	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6660	----------------------------------------------------------------------------/
				6661
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6662	int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6663	{
				6664	flag aSign, bSign;
				6665
				6666	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6667	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6668	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6669	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6670	) {
				6671	float_raise( float_flag_invalid STATUS_VAR);
				6672	return 0;
				6673	}
				6674	aSign = extractFloat128Sign( a );
				6675	bSign = extractFloat128Sign( b );
				6676	if ( aSign != bSign ) {
				6677	return
				6678	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6679	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6680	!= 0 );
				6681	}
				6682	return
				6683	aSign ? lt128( b.high, b.low, a.high, a.low )
				6684	: lt128( a.high, a.low, b.high, b.low );
				6685
				6686	}
				6687
				6688	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6689	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6690	\| be compared, and 0 otherwise. The invalid exception is raised if either
				6691	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				6692	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6693	----------------------------------------------------------------------------/
				6694
				6695	int float128_unordered( float128 a, float128 b STATUS_PARAM )
				6696	{
				6697	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6698	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6699	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6700	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6701	) {
				6702	float_raise( float_flag_invalid STATUS_VAR);
				6703	return 1;
				6704	}
				6705	return 0;
				6706	}
				6707
				6708	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6709	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6710	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				6711	\| exception. The comparison is performed according to the IEC/IEEE Standard
				6712	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6713	----------------------------------------------------------------------------/
				6714
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6715	int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6716	{
				6717
				6718	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6719	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6720	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6721	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6722	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6723	if ( float128_is_signaling_nan( a )
				6724	\|\| float128_is_signaling_nan( b ) ) {
				6725	float_raise( float_flag_invalid STATUS_VAR);
				6726	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6727	return 0;
				6728	}
				6729	return
				6730	( a.low == b.low )
				6731	&& ( ( a.high == b.high )
				6732	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6733	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6734	);
				6735
				6736	}
				6737
				6738	/*----------------------------------------------------------------------------
				6739	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				6740	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				6741	\| cause an exception. Otherwise, the comparison is performed according to the
				6742	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6743	----------------------------------------------------------------------------/
				6744
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6745	int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6746	{
				6747	flag aSign, bSign;
				6748
				6749	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6750	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6751	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6752	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6753	) {
				6754	if ( float128_is_signaling_nan( a )
				6755	\|\| float128_is_signaling_nan( b ) ) {
				6756	float_raise( float_flag_invalid STATUS_VAR);
				6757	}
				6758	return 0;
				6759	}
				6760	aSign = extractFloat128Sign( a );
				6761	bSign = extractFloat128Sign( b );
				6762	if ( aSign != bSign ) {
				6763	return
				6764	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6765	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6766	== 0 );
				6767	}
				6768	return
				6769	aSign ? le128( b.high, b.low, a.high, a.low )
				6770	: le128( a.high, a.low, b.high, b.low );
				6771
				6772	}
				6773
				6774	/*----------------------------------------------------------------------------
				6775	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				6776	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				6777	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				6778	\| Standard for Binary Floating-Point Arithmetic.
				6779	----------------------------------------------------------------------------/
				6780
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6781	int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6782	{
				6783	flag aSign, bSign;
				6784
				6785	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6786	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6787	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6788	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6789	) {
				6790	if ( float128_is_signaling_nan( a )
				6791	\|\| float128_is_signaling_nan( b ) ) {
				6792	float_raise( float_flag_invalid STATUS_VAR);
				6793	}
				6794	return 0;
				6795	}
				6796	aSign = extractFloat128Sign( a );
				6797	bSign = extractFloat128Sign( b );
				6798	if ( aSign != bSign ) {
				6799	return
				6800	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6801	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6802	!= 0 );
				6803	}
				6804	return
				6805	aSign ? lt128( b.high, b.low, a.high, a.low )
				6806	: lt128( a.high, a.low, b.high, b.low );
				6807
				6808	}
				6809
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6810	/*----------------------------------------------------------------------------
				6811	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
				6812	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				6813	\| comparison is performed according to the IEC/IEEE Standard for Binary
				6814	\| Floating-Point Arithmetic.
				6815	----------------------------------------------------------------------------/
				6816
				6817	int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
				6818	{
				6819	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6820	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6821	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6822	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6823	) {
				6824	if ( float128_is_signaling_nan( a )
				6825	\|\| float128_is_signaling_nan( b ) ) {
				6826	float_raise( float_flag_invalid STATUS_VAR);
				6827	}
				6828	return 1;
				6829	}
				6830	return 0;
				6831	}
				6832
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6833	/* misc functions */
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	6834	float32 uint32_to_float32(uint32_t a STATUS_PARAM)
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6835	{
				6836	return int64_to_float32(a STATUS_VAR);
				6837	}
				6838
Peter Maydell	c4850f9	2014-01-07 17:17:49 +0000	[diff] [blame]	6839	float64 uint32_to_float64(uint32_t a STATUS_PARAM)
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6840	{
				6841	return int64_to_float64(a STATUS_VAR);
				6842	}
				6843
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6844	uint32 float32_to_uint32( float32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6845	{
				6846	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6847	uint32 res;
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6848	int old_exc_flags = get_float_exception_flags(status);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6849
				6850	v = float32_to_int64(a STATUS_VAR);
				6851	if (v < 0) {
				6852	res = 0;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6853	} else if (v > 0xffffffff) {
				6854	res = 0xffffffff;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6855	} else {
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6856	return v;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6857	}
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6858	set_float_exception_flags(old_exc_flags, status);
				6859	float_raise(float_flag_invalid STATUS_VAR);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6860	return res;
				6861	}
				6862
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6863	uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6864	{
				6865	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6866	uint32 res;
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6867	int old_exc_flags = get_float_exception_flags(status);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6868
				6869	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				6870	if (v < 0) {
				6871	res = 0;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6872	} else if (v > 0xffffffff) {
				6873	res = 0xffffffff;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6874	} else {
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6875	return v;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6876	}
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6877	set_float_exception_flags(old_exc_flags, status);
				6878	float_raise(float_flag_invalid STATUS_VAR);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6879	return res;
				6880	}
				6881
Will Newton	f581bf5	2014-01-07 17:17:48 +0000	[diff] [blame]	6882	int_fast16_t float32_to_int16(float32 a STATUS_PARAM)
				6883	{
				6884	int32_t v;
				6885	int_fast16_t res;
				6886	int old_exc_flags = get_float_exception_flags(status);
				6887
				6888	v = float32_to_int32(a STATUS_VAR);
				6889	if (v < -0x8000) {
				6890	res = -0x8000;
				6891	} else if (v > 0x7fff) {
				6892	res = 0x7fff;
				6893	} else {
				6894	return v;
				6895	}
				6896
				6897	set_float_exception_flags(old_exc_flags, status);
				6898	float_raise(float_flag_invalid STATUS_VAR);
				6899	return res;
				6900	}
				6901
				6902	uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM)
				6903	{
				6904	int32_t v;
				6905	uint_fast16_t res;
				6906	int old_exc_flags = get_float_exception_flags(status);
				6907
				6908	v = float32_to_int32(a STATUS_VAR);
				6909	if (v < 0) {
				6910	res = 0;
				6911	} else if (v > 0xffff) {
				6912	res = 0xffff;
				6913	} else {
				6914	return v;
				6915	}
				6916
				6917	set_float_exception_flags(old_exc_flags, status);
				6918	float_raise(float_flag_invalid STATUS_VAR);
				6919	return res;
				6920	}
				6921
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	6922	uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6923	{
				6924	int64_t v;
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	6925	uint_fast16_t res;
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6926	int old_exc_flags = get_float_exception_flags(status);
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6927
				6928	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				6929	if (v < 0) {
				6930	res = 0;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6931	} else if (v > 0xffff) {
				6932	res = 0xffff;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6933	} else {
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6934	return v;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6935	}
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	6936	set_float_exception_flags(old_exc_flags, status);
				6937	float_raise(float_flag_invalid STATUS_VAR);
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6938	return res;
				6939	}
				6940
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6941	uint32 float64_to_uint32( float64 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6942	{
Tom Musta	5e7f654	2014-01-07 17:17:51 +0000	[diff] [blame]	6943	uint64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6944	uint32 res;
Tom Musta	5e7f654	2014-01-07 17:17:51 +0000	[diff] [blame]	6945	int old_exc_flags = get_float_exception_flags(status);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6946
Tom Musta	5e7f654	2014-01-07 17:17:51 +0000	[diff] [blame]	6947	v = float64_to_uint64(a STATUS_VAR);
				6948	if (v > 0xffffffff) {
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6949	res = 0xffffffff;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6950	} else {
Tom Musta	5e7f654	2014-01-07 17:17:51 +0000	[diff] [blame]	6951	return v;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6952	}
Tom Musta	5e7f654	2014-01-07 17:17:51 +0000	[diff] [blame]	6953	set_float_exception_flags(old_exc_flags, status);
				6954	float_raise(float_flag_invalid STATUS_VAR);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6955	return res;
				6956	}
				6957
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6958	uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6959	{
Tom Musta	fd728f2	2014-01-07 17:17:51 +0000	[diff] [blame]	6960	uint64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6961	uint32 res;
Tom Musta	fd728f2	2014-01-07 17:17:51 +0000	[diff] [blame]	6962	int old_exc_flags = get_float_exception_flags(status);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6963
Tom Musta	fd728f2	2014-01-07 17:17:51 +0000	[diff] [blame]	6964	v = float64_to_uint64_round_to_zero(a STATUS_VAR);
				6965	if (v > 0xffffffff) {
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6966	res = 0xffffffff;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6967	} else {
Tom Musta	fd728f2	2014-01-07 17:17:51 +0000	[diff] [blame]	6968	return v;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6969	}
Tom Musta	fd728f2	2014-01-07 17:17:51 +0000	[diff] [blame]	6970	set_float_exception_flags(old_exc_flags, status);
				6971	float_raise(float_flag_invalid STATUS_VAR);
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6972	return res;
				6973	}
				6974
Will Newton	f581bf5	2014-01-07 17:17:48 +0000	[diff] [blame]	6975	int_fast16_t float64_to_int16(float64 a STATUS_PARAM)
				6976	{
				6977	int64_t v;
				6978	int_fast16_t res;
				6979	int old_exc_flags = get_float_exception_flags(status);
				6980
				6981	v = float64_to_int32(a STATUS_VAR);
				6982	if (v < -0x8000) {
				6983	res = -0x8000;
				6984	} else if (v > 0x7fff) {
				6985	res = 0x7fff;
				6986	} else {
				6987	return v;
				6988	}
				6989
				6990	set_float_exception_flags(old_exc_flags, status);
				6991	float_raise(float_flag_invalid STATUS_VAR);
				6992	return res;
				6993	}
				6994
				6995	uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM)
				6996	{
				6997	int64_t v;
				6998	uint_fast16_t res;
				6999	int old_exc_flags = get_float_exception_flags(status);
				7000
				7001	v = float64_to_int32(a STATUS_VAR);
				7002	if (v < 0) {
				7003	res = 0;
				7004	} else if (v > 0xffff) {
				7005	res = 0xffff;
				7006	} else {
				7007	return v;
				7008	}
				7009
				7010	set_float_exception_flags(old_exc_flags, status);
				7011	float_raise(float_flag_invalid STATUS_VAR);
				7012	return res;
				7013	}
				7014
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	7015	uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	7016	{
				7017	int64_t v;
Andreas Färber	5aea4c5	2012-04-26 00:15:55 +0200	[diff] [blame]	7018	uint_fast16_t res;
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	7019	int old_exc_flags = get_float_exception_flags(status);
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	7020
				7021	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				7022	if (v < 0) {
				7023	res = 0;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	7024	} else if (v > 0xffff) {
				7025	res = 0xffff;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	7026	} else {
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	7027	return v;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	7028	}
Peter Maydell	34e1c27	2014-01-07 17:17:49 +0000	[diff] [blame]	7029	set_float_exception_flags(old_exc_flags, status);
				7030	float_raise(float_flag_invalid STATUS_VAR);
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	7031	return res;
				7032	}
				7033
Tom Musta	fb3ea83	2014-01-07 17:17:49 +0000	[diff] [blame]	7034	/*----------------------------------------------------------------------------
				7035	\| Returns the result of converting the double-precision floating-point value
				7036	\| `a' to the 64-bit unsigned integer format. The conversion is
				7037	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				7038	\| Arithmetic---which means in particular that the conversion is rounded
				7039	\| according to the current rounding mode. If `a' is a NaN, the largest
				7040	\| positive integer is returned. If the conversion overflows, the
				7041	\| largest unsigned integer is returned. If 'a' is negative, the value is
				7042	\| rounded and zero is returned; negative values that do not round to zero
				7043	\| will raise the inexact exception.
				7044	----------------------------------------------------------------------------/
				7045
				7046	uint64_t float64_to_uint64(float64 a STATUS_PARAM)
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	7047	{
Tom Musta	fb3ea83	2014-01-07 17:17:49 +0000	[diff] [blame]	7048	flag aSign;
				7049	int_fast16_t aExp, shiftCount;
				7050	uint64_t aSig, aSigExtra;
				7051	a = float64_squash_input_denormal(a STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	7052
Tom Musta	fb3ea83	2014-01-07 17:17:49 +0000	[diff] [blame]	7053	aSig = extractFloat64Frac(a);
				7054	aExp = extractFloat64Exp(a);
				7055	aSign = extractFloat64Sign(a);
				7056	if (aSign && (aExp > 1022)) {
				7057	float_raise(float_flag_invalid STATUS_VAR);
				7058	if (float64_is_any_nan(a)) {
				7059	return LIT64(0xFFFFFFFFFFFFFFFF);
				7060	} else {
				7061	return 0;
				7062	}
				7063	}
				7064	if (aExp) {
				7065	aSig \|= LIT64(0x0010000000000000);
				7066	}
				7067	shiftCount = 0x433 - aExp;
				7068	if (shiftCount <= 0) {
				7069	if (0x43E < aExp) {
				7070	float_raise(float_flag_invalid STATUS_VAR);
				7071	return LIT64(0xFFFFFFFFFFFFFFFF);
				7072	}
				7073	aSigExtra = 0;
				7074	aSig <<= -shiftCount;
				7075	} else {
				7076	shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
				7077	}
				7078	return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	7079	}
				7080
				7081	uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
				7082	{
Tom Musta	0a87a31	2014-01-07 17:17:50 +0000	[diff] [blame]	7083	signed char current_rounding_mode = STATUS(float_rounding_mode);
				7084	set_float_rounding_mode(float_round_to_zero STATUS_VAR);
				7085	int64_t v = float64_to_uint64(a STATUS_VAR);
				7086	set_float_rounding_mode(current_rounding_mode STATUS_VAR);
				7087	return v;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	7088	}
				7089
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7090	#define COMPARE(s, nan_exp) \
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	7091	static inline int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7092	int is_quiet STATUS_PARAM ) \
				7093	{ \
				7094	flag aSign, bSign; \
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	7095	uint ## s ## _t av, bv; \
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	7096	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
				7097	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7098	\
				7099	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
				7100	extractFloat ## s ## Frac( a ) ) \|\| \
				7101	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
				7102	extractFloat ## s ## Frac( b ) )) { \
				7103	if (!is_quiet \|\| \
				7104	float ## s ## _is_signaling_nan( a ) \|\| \
				7105	float ## s ## _is_signaling_nan( b ) ) { \
				7106	float_raise( float_flag_invalid STATUS_VAR); \
				7107	} \
				7108	return float_relation_unordered; \
				7109	} \
				7110	aSign = extractFloat ## s ## Sign( a ); \
				7111	bSign = extractFloat ## s ## Sign( b ); \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	7112	av = float ## s ## _val(a); \
blueswir1	cd8a253	2007-11-21 18:57:44 +0000	[diff] [blame]	7113	bv = float ## s ## _val(b); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7114	if ( aSign != bSign ) { \
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	7115	if ( (uint ## s ## _t) ( ( av \| bv )<<1 ) == 0 ) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7116	/* zero case */ \
				7117	return float_relation_equal; \
				7118	} else { \
				7119	return 1 - (2 * aSign); \
				7120	} \
				7121	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	7122	if (av == bv) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7123	return float_relation_equal; \
				7124	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	7125	return 1 - 2 * (aSign ^ ( av < bv )); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7126	} \
				7127	} \
				7128	} \
				7129	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	7130	int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7131	{ \
				7132	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
				7133	} \
				7134	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	7135	int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	7136	{ \
				7137	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
				7138	}
				7139
				7140	COMPARE(32, 0xff)
				7141	COMPARE(64, 0x7ff)
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7142
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	7143	static inline int floatx80_compare_internal( floatx80 a, floatx80 b,
Aurelien Jarno	f6714d3	2011-04-20 13:04:22 +0200	[diff] [blame]	7144	int is_quiet STATUS_PARAM )
				7145	{
				7146	flag aSign, bSign;
				7147
				7148	if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
				7149	( extractFloatx80Frac( a )<<1 ) ) \|\|
				7150	( ( extractFloatx80Exp( b ) == 0x7fff ) &&
				7151	( extractFloatx80Frac( b )<<1 ) )) {
				7152	if (!is_quiet \|\|
				7153	floatx80_is_signaling_nan( a ) \|\|
				7154	floatx80_is_signaling_nan( b ) ) {
				7155	float_raise( float_flag_invalid STATUS_VAR);
				7156	}
				7157	return float_relation_unordered;
				7158	}
				7159	aSign = extractFloatx80Sign( a );
				7160	bSign = extractFloatx80Sign( b );
				7161	if ( aSign != bSign ) {
				7162
				7163	if ( ( ( (uint16_t) ( ( a.high \| b.high ) << 1 ) ) == 0) &&
				7164	( ( a.low \| b.low ) == 0 ) ) {
				7165	/* zero case */
				7166	return float_relation_equal;
				7167	} else {
				7168	return 1 - (2 * aSign);
				7169	}
				7170	} else {
				7171	if (a.low == b.low && a.high == b.high) {
				7172	return float_relation_equal;
				7173	} else {
				7174	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				7175	}
				7176	}
				7177	}
				7178
				7179	int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
				7180	{
				7181	return floatx80_compare_internal(a, b, 0 STATUS_VAR);
				7182	}
				7183
				7184	int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
				7185	{
				7186	return floatx80_compare_internal(a, b, 1 STATUS_VAR);
				7187	}
				7188
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	7189	static inline int float128_compare_internal( float128 a, float128 b,
blueswir1	1f58732	2007-11-25 18:40:20 +0000	[diff] [blame]	7190	int is_quiet STATUS_PARAM )
				7191	{
				7192	flag aSign, bSign;
				7193
				7194	if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
				7195	( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) ) \|\|
				7196	( ( extractFloat128Exp( b ) == 0x7fff ) &&
				7197	( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )) {
				7198	if (!is_quiet \|\|
				7199	float128_is_signaling_nan( a ) \|\|
				7200	float128_is_signaling_nan( b ) ) {
				7201	float_raise( float_flag_invalid STATUS_VAR);
				7202	}
				7203	return float_relation_unordered;
				7204	}
				7205	aSign = extractFloat128Sign( a );
				7206	bSign = extractFloat128Sign( b );
				7207	if ( aSign != bSign ) {
				7208	if ( ( ( ( a.high \| b.high )<<1 ) \| a.low \| b.low ) == 0 ) {
				7209	/* zero case */
				7210	return float_relation_equal;
				7211	} else {
				7212	return 1 - (2 * aSign);
				7213	}
				7214	} else {
				7215	if (a.low == b.low && a.high == b.high) {
				7216	return float_relation_equal;
				7217	} else {
				7218	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				7219	}
				7220	}
				7221	}
				7222
				7223	int float128_compare( float128 a, float128 b STATUS_PARAM )
				7224	{
				7225	return float128_compare_internal(a, b, 0 STATUS_VAR);
				7226	}
				7227
				7228	int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
				7229	{
				7230	return float128_compare_internal(a, b, 1 STATUS_VAR);
				7231	}
				7232
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	7233	/* min() and max() functions. These can't be implemented as
				7234	* 'compare and pick one input' because that would mishandle
				7235	* NaNs and +0 vs -0.
Will Newton	e17ab31	2013-12-06 17:01:41 +0000	[diff] [blame]	7236	*
				7237	* minnum() and maxnum() functions. These are similar to the min()
				7238	* and max() functions but if one of the arguments is a QNaN and
				7239	* the other is numerical then the numerical argument is returned.
				7240	* minnum() and maxnum correspond to the IEEE 754-2008 minNum()
				7241	* and maxNum() operations. min() and max() are the typical min/max
				7242	* semantics provided by many CPUs which predate that specification.
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	7243	*/
Will Newton	e70614e	2013-12-06 17:01:41 +0000	[diff] [blame]	7244	#define MINMAX(s) \
Luiz Capitulino	a49db98	2014-06-19 10:13:43 -0400	[diff] [blame]	7245	static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \
Will Newton	e17ab31	2013-12-06 17:01:41 +0000	[diff] [blame]	7246	int ismin, int isieee STATUS_PARAM) \
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	7247	{ \
				7248	flag aSign, bSign; \
				7249	uint ## s ## _t av, bv; \
				7250	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
				7251	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
				7252	if (float ## s ## _is_any_nan(a) \|\| \
				7253	float ## s ## _is_any_nan(b)) { \
Will Newton	e17ab31	2013-12-06 17:01:41 +0000	[diff] [blame]	7254	if (isieee) { \
				7255	if (float ## s ## _is_quiet_nan(a) && \
				7256	!float ## s ##_is_any_nan(b)) { \
				7257	return b; \
				7258	} else if (float ## s ## _is_quiet_nan(b) && \
				7259	!float ## s ## _is_any_nan(a)) { \
				7260	return a; \
				7261	} \
				7262	} \
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	7263	return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
				7264	} \
				7265	aSign = extractFloat ## s ## Sign(a); \
				7266	bSign = extractFloat ## s ## Sign(b); \
				7267	av = float ## s ## _val(a); \
				7268	bv = float ## s ## _val(b); \
				7269	if (aSign != bSign) { \
				7270	if (ismin) { \
				7271	return aSign ? a : b; \
				7272	} else { \
				7273	return aSign ? b : a; \
				7274	} \
				7275	} else { \
				7276	if (ismin) { \
				7277	return (aSign ^ (av < bv)) ? a : b; \
				7278	} else { \
				7279	return (aSign ^ (av < bv)) ? b : a; \
				7280	} \
				7281	} \
				7282	} \
				7283	\
				7284	float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
				7285	{ \
Will Newton	e17ab31	2013-12-06 17:01:41 +0000	[diff] [blame]	7286	return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR); \
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	7287	} \
				7288	\
				7289	float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
				7290	{ \
Will Newton	e17ab31	2013-12-06 17:01:41 +0000	[diff] [blame]	7291	return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR); \
				7292	} \
				7293	\
				7294	float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \
				7295	{ \
				7296	return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR); \
				7297	} \
				7298	\
				7299	float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \
				7300	{ \
				7301	return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR); \
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	7302	}
				7303
Will Newton	e70614e	2013-12-06 17:01:41 +0000	[diff] [blame]	7304	MINMAX(32)
				7305	MINMAX(64)
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	7306
				7307
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7308	/* Multiply A by 2 raised to the power N. */
				7309	float32 float32_scalbn( float32 a, int n STATUS_PARAM )
				7310	{
				7311	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7312	int16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	7313	uint32_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7314
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	7315	a = float32_squash_input_denormal(a STATUS_VAR);
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7316	aSig = extractFloat32Frac( a );
				7317	aExp = extractFloat32Exp( a );
				7318	aSign = extractFloat32Sign( a );
				7319
				7320	if ( aExp == 0xFF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7321	if ( aSig ) {
				7322	return propagateFloat32NaN( a, a STATUS_VAR );
				7323	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7324	return a;
				7325	}
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7326	if (aExp != 0) {
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7327	aSig \|= 0x00800000;
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7328	} else if (aSig == 0) {
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7329	return a;
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7330	} else {
				7331	aExp++;
				7332	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7333
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7334	if (n > 0x200) {
				7335	n = 0x200;
				7336	} else if (n < -0x200) {
				7337	n = -0x200;
				7338	}
				7339
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7340	aExp += n - 1;
				7341	aSig <<= 7;
				7342	return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7343	}
				7344
				7345	float64 float64_scalbn( float64 a, int n STATUS_PARAM )
				7346	{
				7347	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7348	int16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	7349	uint64_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7350
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	7351	a = float64_squash_input_denormal(a STATUS_VAR);
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7352	aSig = extractFloat64Frac( a );
				7353	aExp = extractFloat64Exp( a );
				7354	aSign = extractFloat64Sign( a );
				7355
				7356	if ( aExp == 0x7FF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7357	if ( aSig ) {
				7358	return propagateFloat64NaN( a, a STATUS_VAR );
				7359	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7360	return a;
				7361	}
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7362	if (aExp != 0) {
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7363	aSig \|= LIT64( 0x0010000000000000 );
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7364	} else if (aSig == 0) {
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7365	return a;
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7366	} else {
				7367	aExp++;
				7368	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7369
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7370	if (n > 0x1000) {
				7371	n = 0x1000;
				7372	} else if (n < -0x1000) {
				7373	n = -0x1000;
				7374	}
				7375
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7376	aExp += n - 1;
				7377	aSig <<= 10;
				7378	return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7379	}
				7380
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7381	floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
				7382	{
				7383	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7384	int32_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	7385	uint64_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7386
				7387	aSig = extractFloatx80Frac( a );
				7388	aExp = extractFloatx80Exp( a );
				7389	aSign = extractFloatx80Sign( a );
				7390
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7391	if ( aExp == 0x7FFF ) {
				7392	if ( aSig<<1 ) {
				7393	return propagateFloatx80NaN( a, a STATUS_VAR );
				7394	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7395	return a;
				7396	}
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7397
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7398	if (aExp == 0) {
				7399	if (aSig == 0) {
				7400	return a;
				7401	}
				7402	aExp++;
				7403	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7404
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7405	if (n > 0x10000) {
				7406	n = 0x10000;
				7407	} else if (n < -0x10000) {
				7408	n = -0x10000;
				7409	}
				7410
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7411	aExp += n;
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7412	return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
				7413	aSign, aExp, aSig, 0 STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7414	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7415
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7416	float128 float128_scalbn( float128 a, int n STATUS_PARAM )
				7417	{
				7418	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7419	int32_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	7420	uint64_t aSig0, aSig1;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7421
				7422	aSig1 = extractFloat128Frac1( a );
				7423	aSig0 = extractFloat128Frac0( a );
				7424	aExp = extractFloat128Exp( a );
				7425	aSign = extractFloat128Sign( a );
				7426	if ( aExp == 0x7FFF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7427	if ( aSig0 \| aSig1 ) {
				7428	return propagateFloat128NaN( a, a STATUS_VAR );
				7429	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7430	return a;
				7431	}
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7432	if (aExp != 0) {
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7433	aSig0 \|= LIT64( 0x0001000000000000 );
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7434	} else if (aSig0 == 0 && aSig1 == 0) {
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7435	return a;
Peter Maydell	3c85c37	2014-01-07 17:17:50 +0000	[diff] [blame]	7436	} else {
				7437	aExp++;
				7438	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7439
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	7440	if (n > 0x10000) {
				7441	n = 0x10000;
				7442	} else if (n < -0x10000) {
				7443	n = -0x10000;
				7444	}
				7445
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	7446	aExp += n - 1;
				7447	return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
				7448	STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	7449
				7450	}