Blame - fpu/softfloat.c - qemu

blob: 81a7d1ae09ecb2545867d2b4dcab92675ec5ce4d [file] [log] [blame]

Andreas Färber	8d725fa	2011-03-07 01:34:04 +0100	[diff] [blame]	1	/*
				2	* QEMU float support
				3	*
				4	* Derived from SoftFloat.
				5	*/
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6
				7	/*============================================================================
				8
				9	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
				10	Package, Release 2b.
				11
				12	Written by John R. Hauser. This work was made possible in part by the
				13	International Computer Science Institute, located at Suite 600, 1947 Center
				14	Street, Berkeley, California 94704. Funding was partially provided by the
				15	National Science Foundation under grant MIP-9311980. The original version
				16	of this code was written as part of a project to build a fixed-point vector
				17	processor in collaboration with the University of California at Berkeley,
				18	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				19	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
				20	arithmetic/SoftFloat.html'.
				21
				22	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
				23	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
				24	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
				25	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
				26	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
				27	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
				28	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
				29	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
				30
				31	Derivative works are acceptable, even for commercial purposes, so long as
				32	(1) the source code for the derivative work includes prominent notice that
				33	the work is derivative, and (2) the source code includes prominent notice with
				34	these four paragraphs for those parts of this code that are retained.
				35
				36	=============================================================================*/
				37
Peter Maydell	2ac8bd0	2011-09-26 16:56:55 +0100	[diff] [blame]	38	/* softfloat (and in particular the code in softfloat-specialize.h) is
				39	* target-dependent and needs the TARGET_* macros.
				40	*/
				41	#include "config.h"
				42
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	43	#include "softfloat.h"
				44
				45	/*----------------------------------------------------------------------------
				46	\| Primitive arithmetic functions, including multi-word arithmetic, and
				47	\| division and square root approximations. (Can be specialized to target if
				48	\| desired.)
				49	----------------------------------------------------------------------------/
				50	#include "softfloat-macros.h"
				51
				52	/*----------------------------------------------------------------------------
				53	\| Functions and definitions to determine: (1) whether tininess for underflow
				54	\| is detected before or after rounding by default, (2) what (if anything)
				55	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
				56	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				57	\| are propagated from function inputs to output. These details are target-
				58	\| specific.
				59	----------------------------------------------------------------------------/
				60	#include "softfloat-specialize.h"
				61
				62	void set_float_rounding_mode(int val STATUS_PARAM)
				63	{
				64	STATUS(float_rounding_mode) = val;
				65	}
				66
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	67	void set_float_exception_flags(int val STATUS_PARAM)
				68	{
				69	STATUS(float_exception_flags) = val;
				70	}
				71
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	72	void set_floatx80_rounding_precision(int val STATUS_PARAM)
				73	{
				74	STATUS(floatx80_rounding_precision) = val;
				75	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	76
				77	/*----------------------------------------------------------------------------
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	78	\| Returns the fraction bits of the half-precision floating-point value `a'.
				79	----------------------------------------------------------------------------/
				80
				81	INLINE uint32_t extractFloat16Frac(float16 a)
				82	{
				83	return float16_val(a) & 0x3ff;
				84	}
				85
				86	/*----------------------------------------------------------------------------
				87	\| Returns the exponent bits of the half-precision floating-point value `a'.
				88	----------------------------------------------------------------------------/
				89
				90	INLINE int16 extractFloat16Exp(float16 a)
				91	{
				92	return (float16_val(a) >> 10) & 0x1f;
				93	}
				94
				95	/*----------------------------------------------------------------------------
				96	\| Returns the sign bit of the single-precision floating-point value `a'.
				97	----------------------------------------------------------------------------/
				98
				99	INLINE flag extractFloat16Sign(float16 a)
				100	{
				101	return float16_val(a)>>15;
				102	}
				103
				104	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	105	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				106	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
				107	\| input. If `zSign' is 1, the input is negated before being converted to an
				108	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
				109	\| is simply rounded to an integer, with the inexact exception raised if the
				110	\| input cannot be represented exactly as an integer. However, if the fixed-
				111	\| point input is too large, the invalid exception is raised and the largest
				112	\| positive or negative integer is returned.
				113	----------------------------------------------------------------------------/
				114
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	115	static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	116	{
				117	int8 roundingMode;
				118	flag roundNearestEven;
				119	int8 roundIncrement, roundBits;
				120	int32 z;
				121
				122	roundingMode = STATUS(float_rounding_mode);
				123	roundNearestEven = ( roundingMode == float_round_nearest_even );
				124	roundIncrement = 0x40;
				125	if ( ! roundNearestEven ) {
				126	if ( roundingMode == float_round_to_zero ) {
				127	roundIncrement = 0;
				128	}
				129	else {
				130	roundIncrement = 0x7F;
				131	if ( zSign ) {
				132	if ( roundingMode == float_round_up ) roundIncrement = 0;
				133	}
				134	else {
				135	if ( roundingMode == float_round_down ) roundIncrement = 0;
				136	}
				137	}
				138	}
				139	roundBits = absZ & 0x7F;
				140	absZ = ( absZ + roundIncrement )>>7;
				141	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				142	z = absZ;
				143	if ( zSign ) z = - z;
				144	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				145	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	146	return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	147	}
				148	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				149	return z;
				150
				151	}
				152
				153	/*----------------------------------------------------------------------------
				154	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
				155	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
				156	\| and returns the properly rounded 64-bit integer corresponding to the input.
				157	\| If `zSign' is 1, the input is negated before being converted to an integer.
				158	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
				159	\| the inexact exception raised if the input cannot be represented exactly as
				160	\| an integer. However, if the fixed-point input is too large, the invalid
				161	\| exception is raised and the largest positive or negative integer is
				162	\| returned.
				163	----------------------------------------------------------------------------/
				164
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	165	static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	166	{
				167	int8 roundingMode;
				168	flag roundNearestEven, increment;
				169	int64 z;
				170
				171	roundingMode = STATUS(float_rounding_mode);
				172	roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	173	increment = ( (int64_t) absZ1 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	174	if ( ! roundNearestEven ) {
				175	if ( roundingMode == float_round_to_zero ) {
				176	increment = 0;
				177	}
				178	else {
				179	if ( zSign ) {
				180	increment = ( roundingMode == float_round_down ) && absZ1;
				181	}
				182	else {
				183	increment = ( roundingMode == float_round_up ) && absZ1;
				184	}
				185	}
				186	}
				187	if ( increment ) {
				188	++absZ0;
				189	if ( absZ0 == 0 ) goto overflow;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	190	absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	191	}
				192	z = absZ0;
				193	if ( zSign ) z = - z;
				194	if ( z && ( ( z < 0 ) ^ zSign ) ) {
				195	overflow:
				196	float_raise( float_flag_invalid STATUS_VAR);
				197	return
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	198	zSign ? (int64_t) LIT64( 0x8000000000000000 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	199	: LIT64( 0x7FFFFFFFFFFFFFFF );
				200	}
				201	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				202	return z;
				203
				204	}
				205
				206	/*----------------------------------------------------------------------------
				207	\| Returns the fraction bits of the single-precision floating-point value `a'.
				208	----------------------------------------------------------------------------/
				209
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	210	INLINE uint32_t extractFloat32Frac( float32 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	211	{
				212
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	213	return float32_val(a) & 0x007FFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	214
				215	}
				216
				217	/*----------------------------------------------------------------------------
				218	\| Returns the exponent bits of the single-precision floating-point value `a'.
				219	----------------------------------------------------------------------------/
				220
				221	INLINE int16 extractFloat32Exp( float32 a )
				222	{
				223
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	224	return ( float32_val(a)>>23 ) & 0xFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	225
				226	}
				227
				228	/*----------------------------------------------------------------------------
				229	\| Returns the sign bit of the single-precision floating-point value `a'.
				230	----------------------------------------------------------------------------/
				231
				232	INLINE flag extractFloat32Sign( float32 a )
				233	{
				234
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	235	return float32_val(a)>>31;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	236
				237	}
				238
				239	/*----------------------------------------------------------------------------
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	240	\| If `a' is denormal and we are in flush-to-zero mode then set the
				241	\| input-denormal exception and return zero. Otherwise just return the value.
				242	----------------------------------------------------------------------------/
				243	static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
				244	{
				245	if (STATUS(flush_inputs_to_zero)) {
				246	if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
				247	float_raise(float_flag_input_denormal STATUS_VAR);
				248	return make_float32(float32_val(a) & 0x80000000);
				249	}
				250	}
				251	return a;
				252	}
				253
				254	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	255	\| Normalizes the subnormal single-precision floating-point value represented
				256	\| by the denormalized significand `aSig'. The normalized exponent and
				257	\| significand are stored at the locations pointed to by `zExpPtr' and
				258	\| `zSigPtr', respectively.
				259	----------------------------------------------------------------------------/
				260
				261	static void
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	262	normalizeFloat32Subnormal( uint32_t aSig, int16 zExpPtr, uint32_t zSigPtr )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	263	{
				264	int8 shiftCount;
				265
				266	shiftCount = countLeadingZeros32( aSig ) - 8;
				267	*zSigPtr = aSig<<shiftCount;
				268	*zExpPtr = 1 - shiftCount;
				269
				270	}
				271
				272	/*----------------------------------------------------------------------------
				273	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				274	\| single-precision floating-point value, returning the result. After being
				275	\| shifted into the proper positions, the three fields are simply added
				276	\| together to form the result. This means that any integer portion of `zSig'
				277	\| will be added into the exponent. Since a properly normalized significand
				278	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				279	\| than the desired result exponent whenever `zSig' is a complete, normalized
				280	\| significand.
				281	----------------------------------------------------------------------------/
				282
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	283	INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	284	{
				285
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	286	return make_float32(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	287	( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	288
				289	}
				290
				291	/*----------------------------------------------------------------------------
				292	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				293	\| and significand `zSig', and returns the proper single-precision floating-
				294	\| point value corresponding to the abstract input. Ordinarily, the abstract
				295	\| value is simply rounded and packed into the single-precision format, with
				296	\| the inexact exception raised if the abstract input cannot be represented
				297	\| exactly. However, if the abstract value is too large, the overflow and
				298	\| inexact exceptions are raised and an infinity or maximal finite value is
				299	\| returned. If the abstract value is too small, the input value is rounded to
				300	\| a subnormal number, and the underflow and inexact exceptions are raised if
				301	\| the abstract input cannot be represented exactly as a subnormal single-
				302	\| precision floating-point number.
				303	\| The input significand `zSig' has its binary point between bits 30
				304	\| and 29, which is 7 bits to the left of the usual location. This shifted
				305	\| significand must be normalized or smaller. If `zSig' is not normalized,
				306	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				307	\| and it must not require rounding. In the usual case that `zSig' is
				308	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				309	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				310	\| Binary Floating-Point Arithmetic.
				311	----------------------------------------------------------------------------/
				312
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	313	static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	314	{
				315	int8 roundingMode;
				316	flag roundNearestEven;
				317	int8 roundIncrement, roundBits;
				318	flag isTiny;
				319
				320	roundingMode = STATUS(float_rounding_mode);
				321	roundNearestEven = ( roundingMode == float_round_nearest_even );
				322	roundIncrement = 0x40;
				323	if ( ! roundNearestEven ) {
				324	if ( roundingMode == float_round_to_zero ) {
				325	roundIncrement = 0;
				326	}
				327	else {
				328	roundIncrement = 0x7F;
				329	if ( zSign ) {
				330	if ( roundingMode == float_round_up ) roundIncrement = 0;
				331	}
				332	else {
				333	if ( roundingMode == float_round_down ) roundIncrement = 0;
				334	}
				335	}
				336	}
				337	roundBits = zSig & 0x7F;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	338	if ( 0xFD <= (uint16_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	339	if ( ( 0xFD < zExp )
				340	\|\| ( ( zExp == 0xFD )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	341	&& ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	342	) {
				343	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	344	return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	345	}
				346	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	347	if (STATUS(flush_to_zero)) {
				348	float_raise(float_flag_output_denormal STATUS_VAR);
				349	return packFloat32(zSign, 0, 0);
				350	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	351	isTiny =
				352	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				353	\|\| ( zExp < -1 )
				354	\|\| ( zSig + roundIncrement < 0x80000000 );
				355	shift32RightJamming( zSig, - zExp, &zSig );
				356	zExp = 0;
				357	roundBits = zSig & 0x7F;
				358	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				359	}
				360	}
				361	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				362	zSig = ( zSig + roundIncrement )>>7;
				363	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				364	if ( zSig == 0 ) zExp = 0;
				365	return packFloat32( zSign, zExp, zSig );
				366
				367	}
				368
				369	/*----------------------------------------------------------------------------
				370	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				371	\| and significand `zSig', and returns the proper single-precision floating-
				372	\| point value corresponding to the abstract input. This routine is just like
				373	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
				374	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				375	\| floating-point exponent.
				376	----------------------------------------------------------------------------/
				377
				378	static float32
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	379	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	380	{
				381	int8 shiftCount;
				382
				383	shiftCount = countLeadingZeros32( zSig ) - 1;
				384	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				385
				386	}
				387
				388	/*----------------------------------------------------------------------------
				389	\| Returns the fraction bits of the double-precision floating-point value `a'.
				390	----------------------------------------------------------------------------/
				391
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	392	INLINE uint64_t extractFloat64Frac( float64 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	393	{
				394
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	395	return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	396
				397	}
				398
				399	/*----------------------------------------------------------------------------
				400	\| Returns the exponent bits of the double-precision floating-point value `a'.
				401	----------------------------------------------------------------------------/
				402
				403	INLINE int16 extractFloat64Exp( float64 a )
				404	{
				405
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	406	return ( float64_val(a)>>52 ) & 0x7FF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	407
				408	}
				409
				410	/*----------------------------------------------------------------------------
				411	\| Returns the sign bit of the double-precision floating-point value `a'.
				412	----------------------------------------------------------------------------/
				413
				414	INLINE flag extractFloat64Sign( float64 a )
				415	{
				416
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	417	return float64_val(a)>>63;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	418
				419	}
				420
				421	/*----------------------------------------------------------------------------
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	422	\| If `a' is denormal and we are in flush-to-zero mode then set the
				423	\| input-denormal exception and return zero. Otherwise just return the value.
				424	----------------------------------------------------------------------------/
				425	static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
				426	{
				427	if (STATUS(flush_inputs_to_zero)) {
				428	if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
				429	float_raise(float_flag_input_denormal STATUS_VAR);
				430	return make_float64(float64_val(a) & (1ULL << 63));
				431	}
				432	}
				433	return a;
				434	}
				435
				436	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	437	\| Normalizes the subnormal double-precision floating-point value represented
				438	\| by the denormalized significand `aSig'. The normalized exponent and
				439	\| significand are stored at the locations pointed to by `zExpPtr' and
				440	\| `zSigPtr', respectively.
				441	----------------------------------------------------------------------------/
				442
				443	static void
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	444	normalizeFloat64Subnormal( uint64_t aSig, int16 zExpPtr, uint64_t zSigPtr )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	445	{
				446	int8 shiftCount;
				447
				448	shiftCount = countLeadingZeros64( aSig ) - 11;
				449	*zSigPtr = aSig<<shiftCount;
				450	*zExpPtr = 1 - shiftCount;
				451
				452	}
				453
				454	/*----------------------------------------------------------------------------
				455	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				456	\| double-precision floating-point value, returning the result. After being
				457	\| shifted into the proper positions, the three fields are simply added
				458	\| together to form the result. This means that any integer portion of `zSig'
				459	\| will be added into the exponent. Since a properly normalized significand
				460	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				461	\| than the desired result exponent whenever `zSig' is a complete, normalized
				462	\| significand.
				463	----------------------------------------------------------------------------/
				464
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	465	INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	466	{
				467
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	468	return make_float64(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	469	( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	470
				471	}
				472
				473	/*----------------------------------------------------------------------------
				474	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				475	\| and significand `zSig', and returns the proper double-precision floating-
				476	\| point value corresponding to the abstract input. Ordinarily, the abstract
				477	\| value is simply rounded and packed into the double-precision format, with
				478	\| the inexact exception raised if the abstract input cannot be represented
				479	\| exactly. However, if the abstract value is too large, the overflow and
				480	\| inexact exceptions are raised and an infinity or maximal finite value is
				481	\| returned. If the abstract value is too small, the input value is rounded
				482	\| to a subnormal number, and the underflow and inexact exceptions are raised
				483	\| if the abstract input cannot be represented exactly as a subnormal double-
				484	\| precision floating-point number.
				485	\| The input significand `zSig' has its binary point between bits 62
				486	\| and 61, which is 10 bits to the left of the usual location. This shifted
				487	\| significand must be normalized or smaller. If `zSig' is not normalized,
				488	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				489	\| and it must not require rounding. In the usual case that `zSig' is
				490	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				491	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				492	\| Binary Floating-Point Arithmetic.
				493	----------------------------------------------------------------------------/
				494
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	495	static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	496	{
				497	int8 roundingMode;
				498	flag roundNearestEven;
				499	int16 roundIncrement, roundBits;
				500	flag isTiny;
				501
				502	roundingMode = STATUS(float_rounding_mode);
				503	roundNearestEven = ( roundingMode == float_round_nearest_even );
				504	roundIncrement = 0x200;
				505	if ( ! roundNearestEven ) {
				506	if ( roundingMode == float_round_to_zero ) {
				507	roundIncrement = 0;
				508	}
				509	else {
				510	roundIncrement = 0x3FF;
				511	if ( zSign ) {
				512	if ( roundingMode == float_round_up ) roundIncrement = 0;
				513	}
				514	else {
				515	if ( roundingMode == float_round_down ) roundIncrement = 0;
				516	}
				517	}
				518	}
				519	roundBits = zSig & 0x3FF;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	520	if ( 0x7FD <= (uint16_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	521	if ( ( 0x7FD < zExp )
				522	\|\| ( ( zExp == 0x7FD )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	523	&& ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	524	) {
				525	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	526	return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	527	}
				528	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	529	if (STATUS(flush_to_zero)) {
				530	float_raise(float_flag_output_denormal STATUS_VAR);
				531	return packFloat64(zSign, 0, 0);
				532	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	533	isTiny =
				534	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				535	\|\| ( zExp < -1 )
				536	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				537	shift64RightJamming( zSig, - zExp, &zSig );
				538	zExp = 0;
				539	roundBits = zSig & 0x3FF;
				540	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				541	}
				542	}
				543	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				544	zSig = ( zSig + roundIncrement )>>10;
				545	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				546	if ( zSig == 0 ) zExp = 0;
				547	return packFloat64( zSign, zExp, zSig );
				548
				549	}
				550
				551	/*----------------------------------------------------------------------------
				552	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				553	\| and significand `zSig', and returns the proper double-precision floating-
				554	\| point value corresponding to the abstract input. This routine is just like
				555	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
				556	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				557	\| floating-point exponent.
				558	----------------------------------------------------------------------------/
				559
				560	static float64
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	561	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	562	{
				563	int8 shiftCount;
				564
				565	shiftCount = countLeadingZeros64( zSig ) - 1;
				566	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				567
				568	}
				569
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	570	/*----------------------------------------------------------------------------
				571	\| Returns the fraction bits of the extended double-precision floating-point
				572	\| value `a'.
				573	----------------------------------------------------------------------------/
				574
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	575	INLINE uint64_t extractFloatx80Frac( floatx80 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	576	{
				577
				578	return a.low;
				579
				580	}
				581
				582	/*----------------------------------------------------------------------------
				583	\| Returns the exponent bits of the extended double-precision floating-point
				584	\| value `a'.
				585	----------------------------------------------------------------------------/
				586
				587	INLINE int32 extractFloatx80Exp( floatx80 a )
				588	{
				589
				590	return a.high & 0x7FFF;
				591
				592	}
				593
				594	/*----------------------------------------------------------------------------
				595	\| Returns the sign bit of the extended double-precision floating-point value
				596	\| `a'.
				597	----------------------------------------------------------------------------/
				598
				599	INLINE flag extractFloatx80Sign( floatx80 a )
				600	{
				601
				602	return a.high>>15;
				603
				604	}
				605
				606	/*----------------------------------------------------------------------------
				607	\| Normalizes the subnormal extended double-precision floating-point value
				608	\| represented by the denormalized significand `aSig'. The normalized exponent
				609	\| and significand are stored at the locations pointed to by `zExpPtr' and
				610	\| `zSigPtr', respectively.
				611	----------------------------------------------------------------------------/
				612
				613	static void
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	614	normalizeFloatx80Subnormal( uint64_t aSig, int32 zExpPtr, uint64_t zSigPtr )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	615	{
				616	int8 shiftCount;
				617
				618	shiftCount = countLeadingZeros64( aSig );
				619	*zSigPtr = aSig<<shiftCount;
				620	*zExpPtr = 1 - shiftCount;
				621
				622	}
				623
				624	/*----------------------------------------------------------------------------
				625	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				626	\| extended double-precision floating-point value, returning the result.
				627	----------------------------------------------------------------------------/
				628
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	629	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	630	{
				631	floatx80 z;
				632
				633	z.low = zSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	634	z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	635	return z;
				636
				637	}
				638
				639	/*----------------------------------------------------------------------------
				640	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				641	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
				642	\| and returns the proper extended double-precision floating-point value
				643	\| corresponding to the abstract input. Ordinarily, the abstract value is
				644	\| rounded and packed into the extended double-precision format, with the
				645	\| inexact exception raised if the abstract input cannot be represented
				646	\| exactly. However, if the abstract value is too large, the overflow and
				647	\| inexact exceptions are raised and an infinity or maximal finite value is
				648	\| returned. If the abstract value is too small, the input value is rounded to
				649	\| a subnormal number, and the underflow and inexact exceptions are raised if
				650	\| the abstract input cannot be represented exactly as a subnormal extended
				651	\| double-precision floating-point number.
				652	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
				653	\| number of bits as single or double precision, respectively. Otherwise, the
				654	\| result is rounded to the full precision of the extended double-precision
				655	\| format.
				656	\| The input significand must be normalized or smaller. If the input
				657	\| significand is not normalized, `zExp' must be 0; in that case, the result
				658	\| returned is a subnormal number, and it must not require rounding. The
				659	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				660	\| Floating-Point Arithmetic.
				661	----------------------------------------------------------------------------/
				662
				663	static floatx80
				664	roundAndPackFloatx80(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	665	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	666	STATUS_PARAM)
				667	{
				668	int8 roundingMode;
				669	flag roundNearestEven, increment, isTiny;
				670	int64 roundIncrement, roundMask, roundBits;
				671
				672	roundingMode = STATUS(float_rounding_mode);
				673	roundNearestEven = ( roundingMode == float_round_nearest_even );
				674	if ( roundingPrecision == 80 ) goto precision80;
				675	if ( roundingPrecision == 64 ) {
				676	roundIncrement = LIT64( 0x0000000000000400 );
				677	roundMask = LIT64( 0x00000000000007FF );
				678	}
				679	else if ( roundingPrecision == 32 ) {
				680	roundIncrement = LIT64( 0x0000008000000000 );
				681	roundMask = LIT64( 0x000000FFFFFFFFFF );
				682	}
				683	else {
				684	goto precision80;
				685	}
				686	zSig0 \|= ( zSig1 != 0 );
				687	if ( ! roundNearestEven ) {
				688	if ( roundingMode == float_round_to_zero ) {
				689	roundIncrement = 0;
				690	}
				691	else {
				692	roundIncrement = roundMask;
				693	if ( zSign ) {
				694	if ( roundingMode == float_round_up ) roundIncrement = 0;
				695	}
				696	else {
				697	if ( roundingMode == float_round_down ) roundIncrement = 0;
				698	}
				699	}
				700	}
				701	roundBits = zSig0 & roundMask;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	702	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	703	if ( ( 0x7FFE < zExp )
				704	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				705	) {
				706	goto overflow;
				707	}
				708	if ( zExp <= 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	709	if (STATUS(flush_to_zero)) {
				710	float_raise(float_flag_output_denormal STATUS_VAR);
				711	return packFloatx80(zSign, 0, 0);
				712	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	713	isTiny =
				714	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				715	\|\| ( zExp < 0 )
				716	\|\| ( zSig0 <= zSig0 + roundIncrement );
				717	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				718	zExp = 0;
				719	roundBits = zSig0 & roundMask;
				720	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				721	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				722	zSig0 += roundIncrement;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	723	if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	724	roundIncrement = roundMask + 1;
				725	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				726	roundMask \|= roundIncrement;
				727	}
				728	zSig0 &= ~ roundMask;
				729	return packFloatx80( zSign, zExp, zSig0 );
				730	}
				731	}
				732	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				733	zSig0 += roundIncrement;
				734	if ( zSig0 < roundIncrement ) {
				735	++zExp;
				736	zSig0 = LIT64( 0x8000000000000000 );
				737	}
				738	roundIncrement = roundMask + 1;
				739	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				740	roundMask \|= roundIncrement;
				741	}
				742	zSig0 &= ~ roundMask;
				743	if ( zSig0 == 0 ) zExp = 0;
				744	return packFloatx80( zSign, zExp, zSig0 );
				745	precision80:
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	746	increment = ( (int64_t) zSig1 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	747	if ( ! roundNearestEven ) {
				748	if ( roundingMode == float_round_to_zero ) {
				749	increment = 0;
				750	}
				751	else {
				752	if ( zSign ) {
				753	increment = ( roundingMode == float_round_down ) && zSig1;
				754	}
				755	else {
				756	increment = ( roundingMode == float_round_up ) && zSig1;
				757	}
				758	}
				759	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	760	if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	761	if ( ( 0x7FFE < zExp )
				762	\|\| ( ( zExp == 0x7FFE )
				763	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				764	&& increment
				765	)
				766	) {
				767	roundMask = 0;
				768	overflow:
				769	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				770	if ( ( roundingMode == float_round_to_zero )
				771	\|\| ( zSign && ( roundingMode == float_round_up ) )
				772	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				773	) {
				774	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				775	}
				776	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				777	}
				778	if ( zExp <= 0 ) {
				779	isTiny =
				780	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				781	\|\| ( zExp < 0 )
				782	\|\| ! increment
				783	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				784	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				785	zExp = 0;
				786	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
				787	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				788	if ( roundNearestEven ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	789	increment = ( (int64_t) zSig1 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	790	}
				791	else {
				792	if ( zSign ) {
				793	increment = ( roundingMode == float_round_down ) && zSig1;
				794	}
				795	else {
				796	increment = ( roundingMode == float_round_up ) && zSig1;
				797	}
				798	}
				799	if ( increment ) {
				800	++zSig0;
				801	zSig0 &=
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	802	~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				803	if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	804	}
				805	return packFloatx80( zSign, zExp, zSig0 );
				806	}
				807	}
				808	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				809	if ( increment ) {
				810	++zSig0;
				811	if ( zSig0 == 0 ) {
				812	++zExp;
				813	zSig0 = LIT64( 0x8000000000000000 );
				814	}
				815	else {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	816	zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	817	}
				818	}
				819	else {
				820	if ( zSig0 == 0 ) zExp = 0;
				821	}
				822	return packFloatx80( zSign, zExp, zSig0 );
				823
				824	}
				825
				826	/*----------------------------------------------------------------------------
				827	\| Takes an abstract floating-point value having sign `zSign', exponent
				828	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				829	\| and returns the proper extended double-precision floating-point value
				830	\| corresponding to the abstract input. This routine is just like
				831	\| `roundAndPackFloatx80' except that the input significand does not have to be
				832	\| normalized.
				833	----------------------------------------------------------------------------/
				834
				835	static floatx80
				836	normalizeRoundAndPackFloatx80(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	837	int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	838	STATUS_PARAM)
				839	{
				840	int8 shiftCount;
				841
				842	if ( zSig0 == 0 ) {
				843	zSig0 = zSig1;
				844	zSig1 = 0;
				845	zExp -= 64;
				846	}
				847	shiftCount = countLeadingZeros64( zSig0 );
				848	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				849	zExp -= shiftCount;
				850	return
				851	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
				852
				853	}
				854
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	855	/*----------------------------------------------------------------------------
				856	\| Returns the least-significant 64 fraction bits of the quadruple-precision
				857	\| floating-point value `a'.
				858	----------------------------------------------------------------------------/
				859
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	860	INLINE uint64_t extractFloat128Frac1( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	861	{
				862
				863	return a.low;
				864
				865	}
				866
				867	/*----------------------------------------------------------------------------
				868	\| Returns the most-significant 48 fraction bits of the quadruple-precision
				869	\| floating-point value `a'.
				870	----------------------------------------------------------------------------/
				871
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	872	INLINE uint64_t extractFloat128Frac0( float128 a )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	873	{
				874
				875	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
				876
				877	}
				878
				879	/*----------------------------------------------------------------------------
				880	\| Returns the exponent bits of the quadruple-precision floating-point value
				881	\| `a'.
				882	----------------------------------------------------------------------------/
				883
				884	INLINE int32 extractFloat128Exp( float128 a )
				885	{
				886
				887	return ( a.high>>48 ) & 0x7FFF;
				888
				889	}
				890
				891	/*----------------------------------------------------------------------------
				892	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
				893	----------------------------------------------------------------------------/
				894
				895	INLINE flag extractFloat128Sign( float128 a )
				896	{
				897
				898	return a.high>>63;
				899
				900	}
				901
				902	/*----------------------------------------------------------------------------
				903	\| Normalizes the subnormal quadruple-precision floating-point value
				904	\| represented by the denormalized significand formed by the concatenation of
				905	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
				906	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
				907	\| significand are stored at the location pointed to by `zSig0Ptr', and the
				908	\| least significant 64 bits of the normalized significand are stored at the
				909	\| location pointed to by `zSig1Ptr'.
				910	----------------------------------------------------------------------------/
				911
				912	static void
				913	normalizeFloat128Subnormal(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	914	uint64_t aSig0,
				915	uint64_t aSig1,
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	916	int32 *zExpPtr,
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	917	uint64_t *zSig0Ptr,
				918	uint64_t *zSig1Ptr
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	919	)
				920	{
				921	int8 shiftCount;
				922
				923	if ( aSig0 == 0 ) {
				924	shiftCount = countLeadingZeros64( aSig1 ) - 15;
				925	if ( shiftCount < 0 ) {
				926	*zSig0Ptr = aSig1>>( - shiftCount );
				927	*zSig1Ptr = aSig1<<( shiftCount & 63 );
				928	}
				929	else {
				930	*zSig0Ptr = aSig1<<shiftCount;
				931	*zSig1Ptr = 0;
				932	}
				933	*zExpPtr = - shiftCount - 63;
				934	}
				935	else {
				936	shiftCount = countLeadingZeros64( aSig0 ) - 15;
				937	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
				938	*zExpPtr = 1 - shiftCount;
				939	}
				940
				941	}
				942
				943	/*----------------------------------------------------------------------------
				944	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
				945	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
				946	\| floating-point value, returning the result. After being shifted into the
				947	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
				948	\| added together to form the most significant 32 bits of the result. This
				949	\| means that any integer portion of `zSig0' will be added into the exponent.
				950	\| Since a properly normalized significand will have an integer portion equal
				951	\| to 1, the `zExp' input should be 1 less than the desired result exponent
				952	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
				953	\| significand.
				954	----------------------------------------------------------------------------/
				955
				956	INLINE float128
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	957	packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	958	{
				959	float128 z;
				960
				961	z.low = zSig1;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	962	z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	963	return z;
				964
				965	}
				966
				967	/*----------------------------------------------------------------------------
				968	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				969	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
				970	\| and `zSig2', and returns the proper quadruple-precision floating-point value
				971	\| corresponding to the abstract input. Ordinarily, the abstract value is
				972	\| simply rounded and packed into the quadruple-precision format, with the
				973	\| inexact exception raised if the abstract input cannot be represented
				974	\| exactly. However, if the abstract value is too large, the overflow and
				975	\| inexact exceptions are raised and an infinity or maximal finite value is
				976	\| returned. If the abstract value is too small, the input value is rounded to
				977	\| a subnormal number, and the underflow and inexact exceptions are raised if
				978	\| the abstract input cannot be represented exactly as a subnormal quadruple-
				979	\| precision floating-point number.
				980	\| The input significand must be normalized or smaller. If the input
				981	\| significand is not normalized, `zExp' must be 0; in that case, the result
				982	\| returned is a subnormal number, and it must not require rounding. In the
				983	\| usual case that the input significand is normalized, `zExp' must be 1 less
				984	\| than the ``true'' floating-point exponent. The handling of underflow and
				985	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				986	----------------------------------------------------------------------------/
				987
				988	static float128
				989	roundAndPackFloat128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	990	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	991	{
				992	int8 roundingMode;
				993	flag roundNearestEven, increment, isTiny;
				994
				995	roundingMode = STATUS(float_rounding_mode);
				996	roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	997	increment = ( (int64_t) zSig2 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	998	if ( ! roundNearestEven ) {
				999	if ( roundingMode == float_round_to_zero ) {
				1000	increment = 0;
				1001	}
				1002	else {
				1003	if ( zSign ) {
				1004	increment = ( roundingMode == float_round_down ) && zSig2;
				1005	}
				1006	else {
				1007	increment = ( roundingMode == float_round_up ) && zSig2;
				1008	}
				1009	}
				1010	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1011	if ( 0x7FFD <= (uint32_t) zExp ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1012	if ( ( 0x7FFD < zExp )
				1013	\|\| ( ( zExp == 0x7FFD )
				1014	&& eq128(
				1015	LIT64( 0x0001FFFFFFFFFFFF ),
				1016	LIT64( 0xFFFFFFFFFFFFFFFF ),
				1017	zSig0,
				1018	zSig1
				1019	)
				1020	&& increment
				1021	)
				1022	) {
				1023	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				1024	if ( ( roundingMode == float_round_to_zero )
				1025	\|\| ( zSign && ( roundingMode == float_round_up ) )
				1026	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				1027	) {
				1028	return
				1029	packFloat128(
				1030	zSign,
				1031	0x7FFE,
				1032	LIT64( 0x0000FFFFFFFFFFFF ),
				1033	LIT64( 0xFFFFFFFFFFFFFFFF )
				1034	);
				1035	}
				1036	return packFloat128( zSign, 0x7FFF, 0, 0 );
				1037	}
				1038	if ( zExp < 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	1039	if (STATUS(flush_to_zero)) {
				1040	float_raise(float_flag_output_denormal STATUS_VAR);
				1041	return packFloat128(zSign, 0, 0, 0);
				1042	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1043	isTiny =
				1044	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				1045	\|\| ( zExp < -1 )
				1046	\|\| ! increment
				1047	\|\| lt128(
				1048	zSig0,
				1049	zSig1,
				1050	LIT64( 0x0001FFFFFFFFFFFF ),
				1051	LIT64( 0xFFFFFFFFFFFFFFFF )
				1052	);
				1053	shift128ExtraRightJamming(
				1054	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
				1055	zExp = 0;
				1056	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
				1057	if ( roundNearestEven ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1058	increment = ( (int64_t) zSig2 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1059	}
				1060	else {
				1061	if ( zSign ) {
				1062	increment = ( roundingMode == float_round_down ) && zSig2;
				1063	}
				1064	else {
				1065	increment = ( roundingMode == float_round_up ) && zSig2;
				1066	}
				1067	}
				1068	}
				1069	}
				1070	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1071	if ( increment ) {
				1072	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
				1073	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
				1074	}
				1075	else {
				1076	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
				1077	}
				1078	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1079
				1080	}
				1081
				1082	/*----------------------------------------------------------------------------
				1083	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				1084	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
				1085	\| returns the proper quadruple-precision floating-point value corresponding
				1086	\| to the abstract input. This routine is just like `roundAndPackFloat128'
				1087	\| except that the input significand has fewer bits and does not have to be
				1088	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
				1089	\| point exponent.
				1090	----------------------------------------------------------------------------/
				1091
				1092	static float128
				1093	normalizeRoundAndPackFloat128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1094	flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1095	{
				1096	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1097	uint64_t zSig2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1098
				1099	if ( zSig0 == 0 ) {
				1100	zSig0 = zSig1;
				1101	zSig1 = 0;
				1102	zExp -= 64;
				1103	}
				1104	shiftCount = countLeadingZeros64( zSig0 ) - 15;
				1105	if ( 0 <= shiftCount ) {
				1106	zSig2 = 0;
				1107	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1108	}
				1109	else {
				1110	shift128ExtraRightJamming(
				1111	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
				1112	}
				1113	zExp -= shiftCount;
				1114	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
				1115
				1116	}
				1117
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1118	/*----------------------------------------------------------------------------
				1119	\| Returns the result of converting the 32-bit two's complement integer `a'
				1120	\| to the single-precision floating-point format. The conversion is performed
				1121	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1122	----------------------------------------------------------------------------/
				1123
				1124	float32 int32_to_float32( int32 a STATUS_PARAM )
				1125	{
				1126	flag zSign;
				1127
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1128	if ( a == 0 ) return float32_zero;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1129	if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1130	zSign = ( a < 0 );
				1131	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
				1132
				1133	}
				1134
				1135	/*----------------------------------------------------------------------------
				1136	\| Returns the result of converting the 32-bit two's complement integer `a'
				1137	\| to the double-precision floating-point format. The conversion is performed
				1138	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1139	----------------------------------------------------------------------------/
				1140
				1141	float64 int32_to_float64( int32 a STATUS_PARAM )
				1142	{
				1143	flag zSign;
				1144	uint32 absA;
				1145	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1146	uint64_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1147
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1148	if ( a == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1149	zSign = ( a < 0 );
				1150	absA = zSign ? - a : a;
				1151	shiftCount = countLeadingZeros32( absA ) + 21;
				1152	zSig = absA;
				1153	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
				1154
				1155	}
				1156
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1157	/*----------------------------------------------------------------------------
				1158	\| Returns the result of converting the 32-bit two's complement integer `a'
				1159	\| to the extended double-precision floating-point format. The conversion
				1160	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1161	\| Arithmetic.
				1162	----------------------------------------------------------------------------/
				1163
				1164	floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
				1165	{
				1166	flag zSign;
				1167	uint32 absA;
				1168	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1169	uint64_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1170
				1171	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1172	zSign = ( a < 0 );
				1173	absA = zSign ? - a : a;
				1174	shiftCount = countLeadingZeros32( absA ) + 32;
				1175	zSig = absA;
				1176	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				1177
				1178	}
				1179
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1180	/*----------------------------------------------------------------------------
				1181	\| Returns the result of converting the 32-bit two's complement integer `a' to
				1182	\| the quadruple-precision floating-point format. The conversion is performed
				1183	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1184	----------------------------------------------------------------------------/
				1185
				1186	float128 int32_to_float128( int32 a STATUS_PARAM )
				1187	{
				1188	flag zSign;
				1189	uint32 absA;
				1190	int8 shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1191	uint64_t zSig0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1192
				1193	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1194	zSign = ( a < 0 );
				1195	absA = zSign ? - a : a;
				1196	shiftCount = countLeadingZeros32( absA ) + 17;
				1197	zSig0 = absA;
				1198	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
				1199
				1200	}
				1201
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1202	/*----------------------------------------------------------------------------
				1203	\| Returns the result of converting the 64-bit two's complement integer `a'
				1204	\| to the single-precision floating-point format. The conversion is performed
				1205	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1206	----------------------------------------------------------------------------/
				1207
				1208	float32 int64_to_float32( int64 a STATUS_PARAM )
				1209	{
				1210	flag zSign;
				1211	uint64 absA;
				1212	int8 shiftCount;
				1213
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1214	if ( a == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1215	zSign = ( a < 0 );
				1216	absA = zSign ? - a : a;
				1217	shiftCount = countLeadingZeros64( absA ) - 40;
				1218	if ( 0 <= shiftCount ) {
				1219	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
				1220	}
				1221	else {
				1222	shiftCount += 7;
				1223	if ( shiftCount < 0 ) {
				1224	shift64RightJamming( absA, - shiftCount, &absA );
				1225	}
				1226	else {
				1227	absA <<= shiftCount;
				1228	}
				1229	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
				1230	}
				1231
				1232	}
				1233
j_mayer	3430b0b	2007-03-20 22:25:37 +0000	[diff] [blame]	1234	float32 uint64_to_float32( uint64 a STATUS_PARAM )
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1235	{
				1236	int8 shiftCount;
				1237
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1238	if ( a == 0 ) return float32_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1239	shiftCount = countLeadingZeros64( a ) - 40;
				1240	if ( 0 <= shiftCount ) {
				1241	return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
				1242	}
				1243	else {
				1244	shiftCount += 7;
				1245	if ( shiftCount < 0 ) {
				1246	shift64RightJamming( a, - shiftCount, &a );
				1247	}
				1248	else {
				1249	a <<= shiftCount;
				1250	}
				1251	return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
				1252	}
				1253	}
				1254
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1255	/*----------------------------------------------------------------------------
				1256	\| Returns the result of converting the 64-bit two's complement integer `a'
				1257	\| to the double-precision floating-point format. The conversion is performed
				1258	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1259	----------------------------------------------------------------------------/
				1260
				1261	float64 int64_to_float64( int64 a STATUS_PARAM )
				1262	{
				1263	flag zSign;
				1264
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1265	if ( a == 0 ) return float64_zero;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1266	if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1267	return packFloat64( 1, 0x43E, 0 );
				1268	}
				1269	zSign = ( a < 0 );
				1270	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
				1271
				1272	}
				1273
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1274	float64 uint64_to_float64( uint64 a STATUS_PARAM )
				1275	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1276	if ( a == 0 ) return float64_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1277	return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
				1278
				1279	}
				1280
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1281	/*----------------------------------------------------------------------------
				1282	\| Returns the result of converting the 64-bit two's complement integer `a'
				1283	\| to the extended double-precision floating-point format. The conversion
				1284	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1285	\| Arithmetic.
				1286	----------------------------------------------------------------------------/
				1287
				1288	floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
				1289	{
				1290	flag zSign;
				1291	uint64 absA;
				1292	int8 shiftCount;
				1293
				1294	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1295	zSign = ( a < 0 );
				1296	absA = zSign ? - a : a;
				1297	shiftCount = countLeadingZeros64( absA );
				1298	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
				1299
				1300	}
				1301
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1302	/*----------------------------------------------------------------------------
				1303	\| Returns the result of converting the 64-bit two's complement integer `a' to
				1304	\| the quadruple-precision floating-point format. The conversion is performed
				1305	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1306	----------------------------------------------------------------------------/
				1307
				1308	float128 int64_to_float128( int64 a STATUS_PARAM )
				1309	{
				1310	flag zSign;
				1311	uint64 absA;
				1312	int8 shiftCount;
				1313	int32 zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1314	uint64_t zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1315
				1316	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1317	zSign = ( a < 0 );
				1318	absA = zSign ? - a : a;
				1319	shiftCount = countLeadingZeros64( absA ) + 49;
				1320	zExp = 0x406E - shiftCount;
				1321	if ( 64 <= shiftCount ) {
				1322	zSig1 = 0;
				1323	zSig0 = absA;
				1324	shiftCount -= 64;
				1325	}
				1326	else {
				1327	zSig1 = absA;
				1328	zSig0 = 0;
				1329	}
				1330	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1331	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1332
				1333	}
				1334
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1335	/*----------------------------------------------------------------------------
				1336	\| Returns the result of converting the single-precision floating-point value
				1337	\| `a' to the 32-bit two's complement integer format. The conversion is
				1338	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1339	\| Arithmetic---which means in particular that the conversion is rounded
				1340	\| according to the current rounding mode. If `a' is a NaN, the largest
				1341	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1342	\| largest integer with the same sign as `a' is returned.
				1343	----------------------------------------------------------------------------/
				1344
				1345	int32 float32_to_int32( float32 a STATUS_PARAM )
				1346	{
				1347	flag aSign;
				1348	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1349	uint32_t aSig;
				1350	uint64_t aSig64;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1351
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1352	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1353	aSig = extractFloat32Frac( a );
				1354	aExp = extractFloat32Exp( a );
				1355	aSign = extractFloat32Sign( a );
				1356	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
				1357	if ( aExp ) aSig \|= 0x00800000;
				1358	shiftCount = 0xAF - aExp;
				1359	aSig64 = aSig;
				1360	aSig64 <<= 32;
				1361	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
				1362	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
				1363
				1364	}
				1365
				1366	/*----------------------------------------------------------------------------
				1367	\| Returns the result of converting the single-precision floating-point value
				1368	\| `a' to the 32-bit two's complement integer format. The conversion is
				1369	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1370	\| Arithmetic, except that the conversion is always rounded toward zero.
				1371	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1372	\| the conversion overflows, the largest integer with the same sign as `a' is
				1373	\| returned.
				1374	----------------------------------------------------------------------------/
				1375
				1376	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
				1377	{
				1378	flag aSign;
				1379	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1380	uint32_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1381	int32 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1382	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1383
				1384	aSig = extractFloat32Frac( a );
				1385	aExp = extractFloat32Exp( a );
				1386	aSign = extractFloat32Sign( a );
				1387	shiftCount = aExp - 0x9E;
				1388	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1389	if ( float32_val(a) != 0xCF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1390	float_raise( float_flag_invalid STATUS_VAR);
				1391	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				1392	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1393	return (int32_t) 0x80000000;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1394	}
				1395	else if ( aExp <= 0x7E ) {
				1396	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1397	return 0;
				1398	}
				1399	aSig = ( aSig \| 0x00800000 )<<8;
				1400	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1401	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1402	STATUS(float_exception_flags) \|= float_flag_inexact;
				1403	}
				1404	if ( aSign ) z = - z;
				1405	return z;
				1406
				1407	}
				1408
				1409	/*----------------------------------------------------------------------------
				1410	\| Returns the result of converting the single-precision floating-point value
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1411	\| `a' to the 16-bit two's complement integer format. The conversion is
				1412	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1413	\| Arithmetic, except that the conversion is always rounded toward zero.
				1414	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1415	\| the conversion overflows, the largest integer with the same sign as `a' is
				1416	\| returned.
				1417	----------------------------------------------------------------------------/
				1418
				1419	int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
				1420	{
				1421	flag aSign;
				1422	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1423	uint32_t aSig;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1424	int32 z;
				1425
				1426	aSig = extractFloat32Frac( a );
				1427	aExp = extractFloat32Exp( a );
				1428	aSign = extractFloat32Sign( a );
				1429	shiftCount = aExp - 0x8E;
				1430	if ( 0 <= shiftCount ) {
				1431	if ( float32_val(a) != 0xC7000000 ) {
				1432	float_raise( float_flag_invalid STATUS_VAR);
				1433	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1434	return 0x7FFF;
				1435	}
				1436	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1437	return (int32_t) 0xffff8000;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1438	}
				1439	else if ( aExp <= 0x7E ) {
				1440	if ( aExp \| aSig ) {
				1441	STATUS(float_exception_flags) \|= float_flag_inexact;
				1442	}
				1443	return 0;
				1444	}
				1445	shiftCount -= 0x10;
				1446	aSig = ( aSig \| 0x00800000 )<<8;
				1447	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1448	if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	1449	STATUS(float_exception_flags) \|= float_flag_inexact;
				1450	}
				1451	if ( aSign ) {
				1452	z = - z;
				1453	}
				1454	return z;
				1455
				1456	}
				1457
				1458	/*----------------------------------------------------------------------------
				1459	\| Returns the result of converting the single-precision floating-point value
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1460	\| `a' to the 64-bit two's complement integer format. The conversion is
				1461	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1462	\| Arithmetic---which means in particular that the conversion is rounded
				1463	\| according to the current rounding mode. If `a' is a NaN, the largest
				1464	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1465	\| largest integer with the same sign as `a' is returned.
				1466	----------------------------------------------------------------------------/
				1467
				1468	int64 float32_to_int64( float32 a STATUS_PARAM )
				1469	{
				1470	flag aSign;
				1471	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1472	uint32_t aSig;
				1473	uint64_t aSig64, aSigExtra;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1474	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1475
				1476	aSig = extractFloat32Frac( a );
				1477	aExp = extractFloat32Exp( a );
				1478	aSign = extractFloat32Sign( a );
				1479	shiftCount = 0xBE - aExp;
				1480	if ( shiftCount < 0 ) {
				1481	float_raise( float_flag_invalid STATUS_VAR);
				1482	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1483	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1484	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1485	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1486	}
				1487	if ( aExp ) aSig \|= 0x00800000;
				1488	aSig64 = aSig;
				1489	aSig64 <<= 40;
				1490	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
				1491	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
				1492
				1493	}
				1494
				1495	/*----------------------------------------------------------------------------
				1496	\| Returns the result of converting the single-precision floating-point value
				1497	\| `a' to the 64-bit two's complement integer format. The conversion is
				1498	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1499	\| Arithmetic, except that the conversion is always rounded toward zero. If
				1500	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1501	\| conversion overflows, the largest integer with the same sign as `a' is
				1502	\| returned.
				1503	----------------------------------------------------------------------------/
				1504
				1505	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
				1506	{
				1507	flag aSign;
				1508	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1509	uint32_t aSig;
				1510	uint64_t aSig64;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1511	int64 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1512	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1513
				1514	aSig = extractFloat32Frac( a );
				1515	aExp = extractFloat32Exp( a );
				1516	aSign = extractFloat32Sign( a );
				1517	shiftCount = aExp - 0xBE;
				1518	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1519	if ( float32_val(a) != 0xDF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1520	float_raise( float_flag_invalid STATUS_VAR);
				1521	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1522	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1523	}
				1524	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1525	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1526	}
				1527	else if ( aExp <= 0x7E ) {
				1528	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1529	return 0;
				1530	}
				1531	aSig64 = aSig \| 0x00800000;
				1532	aSig64 <<= 40;
				1533	z = aSig64>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1534	if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1535	STATUS(float_exception_flags) \|= float_flag_inexact;
				1536	}
				1537	if ( aSign ) z = - z;
				1538	return z;
				1539
				1540	}
				1541
				1542	/*----------------------------------------------------------------------------
				1543	\| Returns the result of converting the single-precision floating-point value
				1544	\| `a' to the double-precision floating-point format. The conversion is
				1545	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1546	\| Arithmetic.
				1547	----------------------------------------------------------------------------/
				1548
				1549	float64 float32_to_float64( float32 a STATUS_PARAM )
				1550	{
				1551	flag aSign;
				1552	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1553	uint32_t aSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1554	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1555
				1556	aSig = extractFloat32Frac( a );
				1557	aExp = extractFloat32Exp( a );
				1558	aSign = extractFloat32Sign( a );
				1559	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1560	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1561	return packFloat64( aSign, 0x7FF, 0 );
				1562	}
				1563	if ( aExp == 0 ) {
				1564	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				1565	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1566	--aExp;
				1567	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1568	return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1569
				1570	}
				1571
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1572	/*----------------------------------------------------------------------------
				1573	\| Returns the result of converting the single-precision floating-point value
				1574	\| `a' to the extended double-precision floating-point format. The conversion
				1575	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1576	\| Arithmetic.
				1577	----------------------------------------------------------------------------/
				1578
				1579	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
				1580	{
				1581	flag aSign;
				1582	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1583	uint32_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1584
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1585	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1586	aSig = extractFloat32Frac( a );
				1587	aExp = extractFloat32Exp( a );
				1588	aSign = extractFloat32Sign( a );
				1589	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1590	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1591	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1592	}
				1593	if ( aExp == 0 ) {
				1594	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1595	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1596	}
				1597	aSig \|= 0x00800000;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1598	return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1599
				1600	}
				1601
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1602	/*----------------------------------------------------------------------------
				1603	\| Returns the result of converting the single-precision floating-point value
				1604	\| `a' to the double-precision floating-point format. The conversion is
				1605	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1606	\| Arithmetic.
				1607	----------------------------------------------------------------------------/
				1608
				1609	float128 float32_to_float128( float32 a STATUS_PARAM )
				1610	{
				1611	flag aSign;
				1612	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1613	uint32_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1614
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1615	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1616	aSig = extractFloat32Frac( a );
				1617	aExp = extractFloat32Exp( a );
				1618	aSign = extractFloat32Sign( a );
				1619	if ( aExp == 0xFF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	1620	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1621	return packFloat128( aSign, 0x7FFF, 0, 0 );
				1622	}
				1623	if ( aExp == 0 ) {
				1624	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				1625	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1626	--aExp;
				1627	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1628	return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1629
				1630	}
				1631
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1632	/*----------------------------------------------------------------------------
				1633	\| Rounds the single-precision floating-point value `a' to an integer, and
				1634	\| returns the result as a single-precision floating-point value. The
				1635	\| operation is performed according to the IEC/IEEE Standard for Binary
				1636	\| Floating-Point Arithmetic.
				1637	----------------------------------------------------------------------------/
				1638
				1639	float32 float32_round_to_int( float32 a STATUS_PARAM)
				1640	{
				1641	flag aSign;
				1642	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1643	uint32_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1644	int8 roundingMode;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1645	uint32_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1646	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1647
				1648	aExp = extractFloat32Exp( a );
				1649	if ( 0x96 <= aExp ) {
				1650	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				1651	return propagateFloat32NaN( a, a STATUS_VAR );
				1652	}
				1653	return a;
				1654	}
				1655	if ( aExp <= 0x7E ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1656	if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1657	STATUS(float_exception_flags) \|= float_flag_inexact;
				1658	aSign = extractFloat32Sign( a );
				1659	switch ( STATUS(float_rounding_mode) ) {
				1660	case float_round_nearest_even:
				1661	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				1662	return packFloat32( aSign, 0x7F, 0 );
				1663	}
				1664	break;
				1665	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1666	return make_float32(aSign ? 0xBF800000 : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1667	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1668	return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1669	}
				1670	return packFloat32( aSign, 0, 0 );
				1671	}
				1672	lastBitMask = 1;
				1673	lastBitMask <<= 0x96 - aExp;
				1674	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1675	z = float32_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1676	roundingMode = STATUS(float_rounding_mode);
				1677	if ( roundingMode == float_round_nearest_even ) {
				1678	z += lastBitMask>>1;
				1679	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1680	}
				1681	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1682	if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1683	z += roundBitsMask;
				1684	}
				1685	}
				1686	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1687	if ( z != float32_val(a) ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1688	return make_float32(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1689
				1690	}
				1691
				1692	/*----------------------------------------------------------------------------
				1693	\| Returns the result of adding the absolute values of the single-precision
				1694	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				1695	\| before being returned. `zSign' is ignored if the result is a NaN.
				1696	\| The addition is performed according to the IEC/IEEE Standard for Binary
				1697	\| Floating-Point Arithmetic.
				1698	----------------------------------------------------------------------------/
				1699
				1700	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1701	{
				1702	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1703	uint32_t aSig, bSig, zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1704	int16 expDiff;
				1705
				1706	aSig = extractFloat32Frac( a );
				1707	aExp = extractFloat32Exp( a );
				1708	bSig = extractFloat32Frac( b );
				1709	bExp = extractFloat32Exp( b );
				1710	expDiff = aExp - bExp;
				1711	aSig <<= 6;
				1712	bSig <<= 6;
				1713	if ( 0 < expDiff ) {
				1714	if ( aExp == 0xFF ) {
				1715	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1716	return a;
				1717	}
				1718	if ( bExp == 0 ) {
				1719	--expDiff;
				1720	}
				1721	else {
				1722	bSig \|= 0x20000000;
				1723	}
				1724	shift32RightJamming( bSig, expDiff, &bSig );
				1725	zExp = aExp;
				1726	}
				1727	else if ( expDiff < 0 ) {
				1728	if ( bExp == 0xFF ) {
				1729	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1730	return packFloat32( zSign, 0xFF, 0 );
				1731	}
				1732	if ( aExp == 0 ) {
				1733	++expDiff;
				1734	}
				1735	else {
				1736	aSig \|= 0x20000000;
				1737	}
				1738	shift32RightJamming( aSig, - expDiff, &aSig );
				1739	zExp = bExp;
				1740	}
				1741	else {
				1742	if ( aExp == 0xFF ) {
				1743	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1744	return a;
				1745	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	1746	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	1747	if (STATUS(flush_to_zero)) {
				1748	if (aSig \| bSig) {
				1749	float_raise(float_flag_output_denormal STATUS_VAR);
				1750	}
				1751	return packFloat32(zSign, 0, 0);
				1752	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	1753	return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1754	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1755	zSig = 0x40000000 + aSig + bSig;
				1756	zExp = aExp;
				1757	goto roundAndPack;
				1758	}
				1759	aSig \|= 0x20000000;
				1760	zSig = ( aSig + bSig )<<1;
				1761	--zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1762	if ( (int32_t) zSig < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1763	zSig = aSig + bSig;
				1764	++zExp;
				1765	}
				1766	roundAndPack:
				1767	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1768
				1769	}
				1770
				1771	/*----------------------------------------------------------------------------
				1772	\| Returns the result of subtracting the absolute values of the single-
				1773	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				1774	\| difference is negated before being returned. `zSign' is ignored if the
				1775	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				1776	\| Standard for Binary Floating-Point Arithmetic.
				1777	----------------------------------------------------------------------------/
				1778
				1779	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1780	{
				1781	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1782	uint32_t aSig, bSig, zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1783	int16 expDiff;
				1784
				1785	aSig = extractFloat32Frac( a );
				1786	aExp = extractFloat32Exp( a );
				1787	bSig = extractFloat32Frac( b );
				1788	bExp = extractFloat32Exp( b );
				1789	expDiff = aExp - bExp;
				1790	aSig <<= 7;
				1791	bSig <<= 7;
				1792	if ( 0 < expDiff ) goto aExpBigger;
				1793	if ( expDiff < 0 ) goto bExpBigger;
				1794	if ( aExp == 0xFF ) {
				1795	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1796	float_raise( float_flag_invalid STATUS_VAR);
				1797	return float32_default_nan;
				1798	}
				1799	if ( aExp == 0 ) {
				1800	aExp = 1;
				1801	bExp = 1;
				1802	}
				1803	if ( bSig < aSig ) goto aBigger;
				1804	if ( aSig < bSig ) goto bBigger;
				1805	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				1806	bExpBigger:
				1807	if ( bExp == 0xFF ) {
				1808	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1809	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1810	}
				1811	if ( aExp == 0 ) {
				1812	++expDiff;
				1813	}
				1814	else {
				1815	aSig \|= 0x40000000;
				1816	}
				1817	shift32RightJamming( aSig, - expDiff, &aSig );
				1818	bSig \|= 0x40000000;
				1819	bBigger:
				1820	zSig = bSig - aSig;
				1821	zExp = bExp;
				1822	zSign ^= 1;
				1823	goto normalizeRoundAndPack;
				1824	aExpBigger:
				1825	if ( aExp == 0xFF ) {
				1826	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1827	return a;
				1828	}
				1829	if ( bExp == 0 ) {
				1830	--expDiff;
				1831	}
				1832	else {
				1833	bSig \|= 0x40000000;
				1834	}
				1835	shift32RightJamming( bSig, expDiff, &bSig );
				1836	aSig \|= 0x40000000;
				1837	aBigger:
				1838	zSig = aSig - bSig;
				1839	zExp = aExp;
				1840	normalizeRoundAndPack:
				1841	--zExp;
				1842	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1843
				1844	}
				1845
				1846	/*----------------------------------------------------------------------------
				1847	\| Returns the result of adding the single-precision floating-point values `a'
				1848	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				1849	\| Binary Floating-Point Arithmetic.
				1850	----------------------------------------------------------------------------/
				1851
				1852	float32 float32_add( float32 a, float32 b STATUS_PARAM )
				1853	{
				1854	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1855	a = float32_squash_input_denormal(a STATUS_VAR);
				1856	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1857
				1858	aSign = extractFloat32Sign( a );
				1859	bSign = extractFloat32Sign( b );
				1860	if ( aSign == bSign ) {
				1861	return addFloat32Sigs( a, b, aSign STATUS_VAR);
				1862	}
				1863	else {
				1864	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1865	}
				1866
				1867	}
				1868
				1869	/*----------------------------------------------------------------------------
				1870	\| Returns the result of subtracting the single-precision floating-point values
				1871	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1872	\| for Binary Floating-Point Arithmetic.
				1873	----------------------------------------------------------------------------/
				1874
				1875	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
				1876	{
				1877	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1878	a = float32_squash_input_denormal(a STATUS_VAR);
				1879	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1880
				1881	aSign = extractFloat32Sign( a );
				1882	bSign = extractFloat32Sign( b );
				1883	if ( aSign == bSign ) {
				1884	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1885	}
				1886	else {
				1887	return addFloat32Sigs( a, b, aSign STATUS_VAR );
				1888	}
				1889
				1890	}
				1891
				1892	/*----------------------------------------------------------------------------
				1893	\| Returns the result of multiplying the single-precision floating-point values
				1894	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1895	\| for Binary Floating-Point Arithmetic.
				1896	----------------------------------------------------------------------------/
				1897
				1898	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
				1899	{
				1900	flag aSign, bSign, zSign;
				1901	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1902	uint32_t aSig, bSig;
				1903	uint64_t zSig64;
				1904	uint32_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1905
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1906	a = float32_squash_input_denormal(a STATUS_VAR);
				1907	b = float32_squash_input_denormal(b STATUS_VAR);
				1908
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1909	aSig = extractFloat32Frac( a );
				1910	aExp = extractFloat32Exp( a );
				1911	aSign = extractFloat32Sign( a );
				1912	bSig = extractFloat32Frac( b );
				1913	bExp = extractFloat32Exp( b );
				1914	bSign = extractFloat32Sign( b );
				1915	zSign = aSign ^ bSign;
				1916	if ( aExp == 0xFF ) {
				1917	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1918	return propagateFloat32NaN( a, b STATUS_VAR );
				1919	}
				1920	if ( ( bExp \| bSig ) == 0 ) {
				1921	float_raise( float_flag_invalid STATUS_VAR);
				1922	return float32_default_nan;
				1923	}
				1924	return packFloat32( zSign, 0xFF, 0 );
				1925	}
				1926	if ( bExp == 0xFF ) {
				1927	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1928	if ( ( aExp \| aSig ) == 0 ) {
				1929	float_raise( float_flag_invalid STATUS_VAR);
				1930	return float32_default_nan;
				1931	}
				1932	return packFloat32( zSign, 0xFF, 0 );
				1933	}
				1934	if ( aExp == 0 ) {
				1935	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1936	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1937	}
				1938	if ( bExp == 0 ) {
				1939	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1940	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1941	}
				1942	zExp = aExp + bExp - 0x7F;
				1943	aSig = ( aSig \| 0x00800000 )<<7;
				1944	bSig = ( bSig \| 0x00800000 )<<8;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1945	shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1946	zSig = zSig64;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1947	if ( 0 <= (int32_t) ( zSig<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1948	zSig <<= 1;
				1949	--zExp;
				1950	}
				1951	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1952
				1953	}
				1954
				1955	/*----------------------------------------------------------------------------
				1956	\| Returns the result of dividing the single-precision floating-point value `a'
				1957	\| by the corresponding value `b'. The operation is performed according to the
				1958	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1959	----------------------------------------------------------------------------/
				1960
				1961	float32 float32_div( float32 a, float32 b STATUS_PARAM )
				1962	{
				1963	flag aSign, bSign, zSign;
				1964	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	1965	uint32_t aSig, bSig, zSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	1966	a = float32_squash_input_denormal(a STATUS_VAR);
				1967	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1968
				1969	aSig = extractFloat32Frac( a );
				1970	aExp = extractFloat32Exp( a );
				1971	aSign = extractFloat32Sign( a );
				1972	bSig = extractFloat32Frac( b );
				1973	bExp = extractFloat32Exp( b );
				1974	bSign = extractFloat32Sign( b );
				1975	zSign = aSign ^ bSign;
				1976	if ( aExp == 0xFF ) {
				1977	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1978	if ( bExp == 0xFF ) {
				1979	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1980	float_raise( float_flag_invalid STATUS_VAR);
				1981	return float32_default_nan;
				1982	}
				1983	return packFloat32( zSign, 0xFF, 0 );
				1984	}
				1985	if ( bExp == 0xFF ) {
				1986	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1987	return packFloat32( zSign, 0, 0 );
				1988	}
				1989	if ( bExp == 0 ) {
				1990	if ( bSig == 0 ) {
				1991	if ( ( aExp \| aSig ) == 0 ) {
				1992	float_raise( float_flag_invalid STATUS_VAR);
				1993	return float32_default_nan;
				1994	}
				1995	float_raise( float_flag_divbyzero STATUS_VAR);
				1996	return packFloat32( zSign, 0xFF, 0 );
				1997	}
				1998	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1999	}
				2000	if ( aExp == 0 ) {
				2001	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				2002	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2003	}
				2004	zExp = aExp - bExp + 0x7D;
				2005	aSig = ( aSig \| 0x00800000 )<<7;
				2006	bSig = ( bSig \| 0x00800000 )<<8;
				2007	if ( bSig <= ( aSig + aSig ) ) {
				2008	aSig >>= 1;
				2009	++zExp;
				2010	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2011	zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2012	if ( ( zSig & 0x3F ) == 0 ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2013	zSig \|= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2014	}
				2015	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				2016
				2017	}
				2018
				2019	/*----------------------------------------------------------------------------
				2020	\| Returns the remainder of the single-precision floating-point value `a'
				2021	\| with respect to the corresponding value `b'. The operation is performed
				2022	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2023	----------------------------------------------------------------------------/
				2024
				2025	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
				2026	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	2027	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2028	int16 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2029	uint32_t aSig, bSig;
				2030	uint32_t q;
				2031	uint64_t aSig64, bSig64, q64;
				2032	uint32_t alternateASig;
				2033	int32_t sigMean;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2034	a = float32_squash_input_denormal(a STATUS_VAR);
				2035	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2036
				2037	aSig = extractFloat32Frac( a );
				2038	aExp = extractFloat32Exp( a );
				2039	aSign = extractFloat32Sign( a );
				2040	bSig = extractFloat32Frac( b );
				2041	bExp = extractFloat32Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2042	if ( aExp == 0xFF ) {
				2043	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				2044	return propagateFloat32NaN( a, b STATUS_VAR );
				2045	}
				2046	float_raise( float_flag_invalid STATUS_VAR);
				2047	return float32_default_nan;
				2048	}
				2049	if ( bExp == 0xFF ) {
				2050	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				2051	return a;
				2052	}
				2053	if ( bExp == 0 ) {
				2054	if ( bSig == 0 ) {
				2055	float_raise( float_flag_invalid STATUS_VAR);
				2056	return float32_default_nan;
				2057	}
				2058	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				2059	}
				2060	if ( aExp == 0 ) {
				2061	if ( aSig == 0 ) return a;
				2062	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2063	}
				2064	expDiff = aExp - bExp;
				2065	aSig \|= 0x00800000;
				2066	bSig \|= 0x00800000;
				2067	if ( expDiff < 32 ) {
				2068	aSig <<= 8;
				2069	bSig <<= 8;
				2070	if ( expDiff < 0 ) {
				2071	if ( expDiff < -1 ) return a;
				2072	aSig >>= 1;
				2073	}
				2074	q = ( bSig <= aSig );
				2075	if ( q ) aSig -= bSig;
				2076	if ( 0 < expDiff ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2077	q = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2078	q >>= 32 - expDiff;
				2079	bSig >>= 2;
				2080	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2081	}
				2082	else {
				2083	aSig >>= 2;
				2084	bSig >>= 2;
				2085	}
				2086	}
				2087	else {
				2088	if ( bSig <= aSig ) aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2089	aSig64 = ( (uint64_t) aSig )<<40;
				2090	bSig64 = ( (uint64_t) bSig )<<40;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2091	expDiff -= 64;
				2092	while ( 0 < expDiff ) {
				2093	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				2094	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				2095	aSig64 = - ( ( bSig * q64 )<<38 );
				2096	expDiff -= 62;
				2097	}
				2098	expDiff += 64;
				2099	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				2100	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				2101	q = q64>>( 64 - expDiff );
				2102	bSig <<= 6;
				2103	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				2104	}
				2105	do {
				2106	alternateASig = aSig;
				2107	++q;
				2108	aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2109	} while ( 0 <= (int32_t) aSig );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2110	sigMean = aSig + alternateASig;
				2111	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2112	aSig = alternateASig;
				2113	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2114	zSign = ( (int32_t) aSig < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2115	if ( zSign ) aSig = - aSig;
				2116	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
				2117
				2118	}
				2119
				2120	/*----------------------------------------------------------------------------
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	2121	\| Returns the result of multiplying the single-precision floating-point values
				2122	\| `a' and `b' then adding 'c', with no intermediate rounding step after the
				2123	\| multiplication. The operation is performed according to the IEC/IEEE
				2124	\| Standard for Binary Floating-Point Arithmetic 754-2008.
				2125	\| The flags argument allows the caller to select negation of the
				2126	\| addend, the intermediate product, or the final result. (The difference
				2127	\| between this and having the caller do a separate negation is that negating
				2128	\| externally will flip the sign bit on NaNs.)
				2129	----------------------------------------------------------------------------/
				2130
				2131	float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
				2132	{
				2133	flag aSign, bSign, cSign, zSign;
				2134	int aExp, bExp, cExp, pExp, zExp, expDiff;
				2135	uint32_t aSig, bSig, cSig;
				2136	flag pInf, pZero, pSign;
				2137	uint64_t pSig64, cSig64, zSig64;
				2138	uint32_t pSig;
				2139	int shiftcount;
				2140	flag signflip, infzero;
				2141
				2142	a = float32_squash_input_denormal(a STATUS_VAR);
				2143	b = float32_squash_input_denormal(b STATUS_VAR);
				2144	c = float32_squash_input_denormal(c STATUS_VAR);
				2145	aSig = extractFloat32Frac(a);
				2146	aExp = extractFloat32Exp(a);
				2147	aSign = extractFloat32Sign(a);
				2148	bSig = extractFloat32Frac(b);
				2149	bExp = extractFloat32Exp(b);
				2150	bSign = extractFloat32Sign(b);
				2151	cSig = extractFloat32Frac(c);
				2152	cExp = extractFloat32Exp(c);
				2153	cSign = extractFloat32Sign(c);
				2154
				2155	infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) \|\|
				2156	(aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
				2157
				2158	/* It is implementation-defined whether the cases of (0,inf,qnan)
				2159	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
				2160	* they return if they do), so we have to hand this information
				2161	* off to the target-specific pick-a-NaN routine.
				2162	*/
				2163	if (((aExp == 0xff) && aSig) \|\|
				2164	((bExp == 0xff) && bSig) \|\|
				2165	((cExp == 0xff) && cSig)) {
				2166	return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
				2167	}
				2168
				2169	if (infzero) {
				2170	float_raise(float_flag_invalid STATUS_VAR);
				2171	return float32_default_nan;
				2172	}
				2173
				2174	if (flags & float_muladd_negate_c) {
				2175	cSign ^= 1;
				2176	}
				2177
				2178	signflip = (flags & float_muladd_negate_result) ? 1 : 0;
				2179
				2180	/* Work out the sign and type of the product */
				2181	pSign = aSign ^ bSign;
				2182	if (flags & float_muladd_negate_product) {
				2183	pSign ^= 1;
				2184	}
				2185	pInf = (aExp == 0xff) \|\| (bExp == 0xff);
				2186	pZero = ((aExp \| aSig) == 0) \|\| ((bExp \| bSig) == 0);
				2187
				2188	if (cExp == 0xff) {
				2189	if (pInf && (pSign ^ cSign)) {
				2190	/* addition of opposite-signed infinities => InvalidOperation */
				2191	float_raise(float_flag_invalid STATUS_VAR);
				2192	return float32_default_nan;
				2193	}
				2194	/* Otherwise generate an infinity of the same sign */
				2195	return packFloat32(cSign ^ signflip, 0xff, 0);
				2196	}
				2197
				2198	if (pInf) {
				2199	return packFloat32(pSign ^ signflip, 0xff, 0);
				2200	}
				2201
				2202	if (pZero) {
				2203	if (cExp == 0) {
				2204	if (cSig == 0) {
				2205	/* Adding two exact zeroes */
				2206	if (pSign == cSign) {
				2207	zSign = pSign;
				2208	} else if (STATUS(float_rounding_mode) == float_round_down) {
				2209	zSign = 1;
				2210	} else {
				2211	zSign = 0;
				2212	}
				2213	return packFloat32(zSign ^ signflip, 0, 0);
				2214	}
				2215	/* Exact zero plus a denorm */
				2216	if (STATUS(flush_to_zero)) {
				2217	float_raise(float_flag_output_denormal STATUS_VAR);
				2218	return packFloat32(cSign ^ signflip, 0, 0);
				2219	}
				2220	}
				2221	/* Zero plus something non-zero : just return the something */
				2222	return c ^ (signflip << 31);
				2223	}
				2224
				2225	if (aExp == 0) {
				2226	normalizeFloat32Subnormal(aSig, &aExp, &aSig);
				2227	}
				2228	if (bExp == 0) {
				2229	normalizeFloat32Subnormal(bSig, &bExp, &bSig);
				2230	}
				2231
				2232	/* Calculate the actual result a * b + c */
				2233
				2234	/* Multiply first; this is easy. */
				2235	/* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
				2236	* because we want the true exponent, not the "one-less-than"
				2237	* flavour that roundAndPackFloat32() takes.
				2238	*/
				2239	pExp = aExp + bExp - 0x7e;
				2240	aSig = (aSig \| 0x00800000) << 7;
				2241	bSig = (bSig \| 0x00800000) << 8;
				2242	pSig64 = (uint64_t)aSig * bSig;
				2243	if ((int64_t)(pSig64 << 1) >= 0) {
				2244	pSig64 <<= 1;
				2245	pExp--;
				2246	}
				2247
				2248	zSign = pSign ^ signflip;
				2249
				2250	/* Now pSig64 is the significand of the multiply, with the explicit bit in
				2251	* position 62.
				2252	*/
				2253	if (cExp == 0) {
				2254	if (!cSig) {
				2255	/* Throw out the special case of c being an exact zero now */
				2256	shift64RightJamming(pSig64, 32, &pSig64);
				2257	pSig = pSig64;
				2258	return roundAndPackFloat32(zSign, pExp - 1,
				2259	pSig STATUS_VAR);
				2260	}
				2261	normalizeFloat32Subnormal(cSig, &cExp, &cSig);
				2262	}
				2263
				2264	cSig64 = (uint64_t)cSig << (62 - 23);
				2265	cSig64 \|= LIT64(0x4000000000000000);
				2266	expDiff = pExp - cExp;
				2267
				2268	if (pSign == cSign) {
				2269	/* Addition */
				2270	if (expDiff > 0) {
				2271	/* scale c to match p */
				2272	shift64RightJamming(cSig64, expDiff, &cSig64);
				2273	zExp = pExp;
				2274	} else if (expDiff < 0) {
				2275	/* scale p to match c */
				2276	shift64RightJamming(pSig64, -expDiff, &pSig64);
				2277	zExp = cExp;
				2278	} else {
				2279	/* no scaling needed */
				2280	zExp = cExp;
				2281	}
				2282	/* Add significands and make sure explicit bit ends up in posn 62 */
				2283	zSig64 = pSig64 + cSig64;
				2284	if ((int64_t)zSig64 < 0) {
				2285	shift64RightJamming(zSig64, 1, &zSig64);
				2286	} else {
				2287	zExp--;
				2288	}
				2289	} else {
				2290	/* Subtraction */
				2291	if (expDiff > 0) {
				2292	shift64RightJamming(cSig64, expDiff, &cSig64);
				2293	zSig64 = pSig64 - cSig64;
				2294	zExp = pExp;
				2295	} else if (expDiff < 0) {
				2296	shift64RightJamming(pSig64, -expDiff, &pSig64);
				2297	zSig64 = cSig64 - pSig64;
				2298	zExp = cExp;
				2299	zSign ^= 1;
				2300	} else {
				2301	zExp = pExp;
				2302	if (cSig64 < pSig64) {
				2303	zSig64 = pSig64 - cSig64;
				2304	} else if (pSig64 < cSig64) {
				2305	zSig64 = cSig64 - pSig64;
				2306	zSign ^= 1;
				2307	} else {
				2308	/* Exact zero */
				2309	zSign = signflip;
				2310	if (STATUS(float_rounding_mode) == float_round_down) {
				2311	zSign ^= 1;
				2312	}
				2313	return packFloat32(zSign, 0, 0);
				2314	}
				2315	}
				2316	--zExp;
				2317	/* Normalize to put the explicit bit back into bit 62. */
				2318	shiftcount = countLeadingZeros64(zSig64) - 1;
				2319	zSig64 <<= shiftcount;
				2320	zExp -= shiftcount;
				2321	}
				2322	shift64RightJamming(zSig64, 32, &zSig64);
				2323	return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
				2324	}
				2325
				2326
				2327	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2328	\| Returns the square root of the single-precision floating-point value `a'.
				2329	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2330	\| Floating-Point Arithmetic.
				2331	----------------------------------------------------------------------------/
				2332
				2333	float32 float32_sqrt( float32 a STATUS_PARAM )
				2334	{
				2335	flag aSign;
				2336	int16 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2337	uint32_t aSig, zSig;
				2338	uint64_t rem, term;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2339	a = float32_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2340
				2341	aSig = extractFloat32Frac( a );
				2342	aExp = extractFloat32Exp( a );
				2343	aSign = extractFloat32Sign( a );
				2344	if ( aExp == 0xFF ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2345	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2346	if ( ! aSign ) return a;
				2347	float_raise( float_flag_invalid STATUS_VAR);
				2348	return float32_default_nan;
				2349	}
				2350	if ( aSign ) {
				2351	if ( ( aExp \| aSig ) == 0 ) return a;
				2352	float_raise( float_flag_invalid STATUS_VAR);
				2353	return float32_default_nan;
				2354	}
				2355	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2356	if ( aSig == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2357	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2358	}
				2359	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				2360	aSig = ( aSig \| 0x00800000 )<<8;
				2361	zSig = estimateSqrt32( aExp, aSig ) + 2;
				2362	if ( ( zSig & 0x7F ) <= 5 ) {
				2363	if ( zSig < 2 ) {
				2364	zSig = 0x7FFFFFFF;
				2365	goto roundAndPack;
				2366	}
				2367	aSig >>= aExp & 1;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2368	term = ( (uint64_t) zSig ) * zSig;
				2369	rem = ( ( (uint64_t) aSig )<<32 ) - term;
				2370	while ( (int64_t) rem < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2371	--zSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2372	rem += ( ( (uint64_t) zSig )<<1 ) \| 1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2373	}
				2374	zSig \|= ( rem != 0 );
				2375	}
				2376	shift32RightJamming( zSig, 1, &zSig );
				2377	roundAndPack:
				2378	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
				2379
				2380	}
				2381
				2382	/*----------------------------------------------------------------------------
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2383	\| Returns the binary exponential of the single-precision floating-point value
				2384	\| `a'. The operation is performed according to the IEC/IEEE Standard for
				2385	\| Binary Floating-Point Arithmetic.
				2386	\|
				2387	\| Uses the following identities:
				2388	\|
				2389	\| 1. -------------------------------------------------------------------------
				2390	\| x x*ln(2)
				2391	\| 2 = e
				2392	\|
				2393	\| 2. -------------------------------------------------------------------------
				2394	\| 2 3 4 5 n
				2395	\| x x x x x x x
				2396	\| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
				2397	\| 1! 2! 3! 4! 5! n!
				2398	----------------------------------------------------------------------------/
				2399
				2400	static const float64 float32_exp2_coefficients[15] =
				2401	{
Peter Maydell	d5138cf	2011-02-10 13:59:34 +0000	[diff] [blame]	2402	const_float64( 0x3ff0000000000000ll ), /* 1 */
				2403	const_float64( 0x3fe0000000000000ll ), /* 2 */
				2404	const_float64( 0x3fc5555555555555ll ), /* 3 */
				2405	const_float64( 0x3fa5555555555555ll ), /* 4 */
				2406	const_float64( 0x3f81111111111111ll ), /* 5 */
				2407	const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
				2408	const_float64( 0x3f2a01a01a01a01all ), /* 7 */
				2409	const_float64( 0x3efa01a01a01a01all ), /* 8 */
				2410	const_float64( 0x3ec71de3a556c734ll ), /* 9 */
				2411	const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
				2412	const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
				2413	const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
				2414	const_float64( 0x3de6124613a86d09ll ), /* 13 */
				2415	const_float64( 0x3da93974a8c07c9dll ), /* 14 */
				2416	const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2417	};
				2418
				2419	float32 float32_exp2( float32 a STATUS_PARAM )
				2420	{
				2421	flag aSign;
				2422	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2423	uint32_t aSig;
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2424	float64 r, x, xn;
				2425	int i;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2426	a = float32_squash_input_denormal(a STATUS_VAR);
Aurelien Jarno	8229c99	2009-02-05 12:04:05 +0100	[diff] [blame]	2427
				2428	aSig = extractFloat32Frac( a );
				2429	aExp = extractFloat32Exp( a );
				2430	aSign = extractFloat32Sign( a );
				2431
				2432	if ( aExp == 0xFF) {
				2433	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
				2434	return (aSign) ? float32_zero : a;
				2435	}
				2436	if (aExp == 0) {
				2437	if (aSig == 0) return float32_one;
				2438	}
				2439
				2440	float_raise( float_flag_inexact STATUS_VAR);
				2441
				2442	/* ******************************* */
				2443	/* using float64 for approximation */
				2444	/* ******************************* */
				2445	x = float32_to_float64(a STATUS_VAR);
				2446	x = float64_mul(x, float64_ln2 STATUS_VAR);
				2447
				2448	xn = x;
				2449	r = float64_one;
				2450	for (i = 0 ; i < 15 ; i++) {
				2451	float64 f;
				2452
				2453	f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
				2454	r = float64_add(r, f STATUS_VAR);
				2455
				2456	xn = float64_mul(xn, x STATUS_VAR);
				2457	}
				2458
				2459	return float64_to_float32(r, status);
				2460	}
				2461
				2462	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2463	\| Returns the binary log of the single-precision floating-point value `a'.
				2464	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2465	\| Floating-Point Arithmetic.
				2466	----------------------------------------------------------------------------/
				2467	float32 float32_log2( float32 a STATUS_PARAM )
				2468	{
				2469	flag aSign, zSign;
				2470	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2471	uint32_t aSig, zSig, i;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2472
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2473	a = float32_squash_input_denormal(a STATUS_VAR);
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2474	aSig = extractFloat32Frac( a );
				2475	aExp = extractFloat32Exp( a );
				2476	aSign = extractFloat32Sign( a );
				2477
				2478	if ( aExp == 0 ) {
				2479	if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
				2480	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2481	}
				2482	if ( aSign ) {
				2483	float_raise( float_flag_invalid STATUS_VAR);
				2484	return float32_default_nan;
				2485	}
				2486	if ( aExp == 0xFF ) {
				2487	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
				2488	return a;
				2489	}
				2490
				2491	aExp -= 0x7F;
				2492	aSig \|= 0x00800000;
				2493	zSign = aExp < 0;
				2494	zSig = aExp << 23;
				2495
				2496	for (i = 1 << 22; i > 0; i >>= 1) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2497	aSig = ( (uint64_t)aSig * aSig ) >> 23;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2498	if ( aSig & 0x01000000 ) {
				2499	aSig >>= 1;
				2500	zSig \|= i;
				2501	}
				2502	}
				2503
				2504	if ( zSign )
				2505	zSig = -zSig;
				2506
				2507	return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
				2508	}
				2509
				2510	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2511	\| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2512	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2513	\| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2514	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2515	----------------------------------------------------------------------------/
				2516
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2517	int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2518	{
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2519	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2520	a = float32_squash_input_denormal(a STATUS_VAR);
				2521	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2522
				2523	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2524	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2525	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2526	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2527	return 0;
				2528	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2529	av = float32_val(a);
				2530	bv = float32_val(b);
				2531	return ( av == bv ) \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2532	}
				2533
				2534	/*----------------------------------------------------------------------------
				2535	\| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2536	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
				2537	\| exception is raised if either operand is a NaN. The comparison is performed
				2538	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2539	----------------------------------------------------------------------------/
				2540
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2541	int float32_le( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2542	{
				2543	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2544	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2545	a = float32_squash_input_denormal(a STATUS_VAR);
				2546	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2547
				2548	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2549	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2550	) {
				2551	float_raise( float_flag_invalid STATUS_VAR);
				2552	return 0;
				2553	}
				2554	aSign = extractFloat32Sign( a );
				2555	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2556	av = float32_val(a);
				2557	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2558	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2559	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2560
				2561	}
				2562
				2563	/*----------------------------------------------------------------------------
				2564	\| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2565	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2566	\| raised if either operand is a NaN. The comparison is performed according
				2567	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2568	----------------------------------------------------------------------------/
				2569
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2570	int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2571	{
				2572	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2573	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2574	a = float32_squash_input_denormal(a STATUS_VAR);
				2575	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2576
				2577	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2578	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2579	) {
				2580	float_raise( float_flag_invalid STATUS_VAR);
				2581	return 0;
				2582	}
				2583	aSign = extractFloat32Sign( a );
				2584	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2585	av = float32_val(a);
				2586	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2587	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2588	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2589
				2590	}
				2591
				2592	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2593	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2594	\| be compared, and 0 otherwise. The invalid exception is raised if either
				2595	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				2596	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2597	----------------------------------------------------------------------------/
				2598
				2599	int float32_unordered( float32 a, float32 b STATUS_PARAM )
				2600	{
				2601	a = float32_squash_input_denormal(a STATUS_VAR);
				2602	b = float32_squash_input_denormal(b STATUS_VAR);
				2603
				2604	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2605	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2606	) {
				2607	float_raise( float_flag_invalid STATUS_VAR);
				2608	return 1;
				2609	}
				2610	return 0;
				2611	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2612
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2613	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2614	\| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	2615	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2616	\| exception. The comparison is performed according to the IEC/IEEE Standard
				2617	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2618	----------------------------------------------------------------------------/
				2619
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2620	int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2621	{
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2622	a = float32_squash_input_denormal(a STATUS_VAR);
				2623	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2624
				2625	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2626	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2627	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2628	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2629	float_raise( float_flag_invalid STATUS_VAR);
				2630	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2631	return 0;
				2632	}
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	2633	return ( float32_val(a) == float32_val(b) ) \|\|
				2634	( (uint32_t) ( ( float32_val(a) \| float32_val(b) )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2635	}
				2636
				2637	/*----------------------------------------------------------------------------
				2638	\| Returns 1 if the single-precision floating-point value `a' is less than or
				2639	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2640	\| cause an exception. Otherwise, the comparison is performed according to the
				2641	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2642	----------------------------------------------------------------------------/
				2643
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2644	int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2645	{
				2646	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2647	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2648	a = float32_squash_input_denormal(a STATUS_VAR);
				2649	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2650
				2651	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2652	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2653	) {
				2654	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2655	float_raise( float_flag_invalid STATUS_VAR);
				2656	}
				2657	return 0;
				2658	}
				2659	aSign = extractFloat32Sign( a );
				2660	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2661	av = float32_val(a);
				2662	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2663	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2664	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2665
				2666	}
				2667
				2668	/*----------------------------------------------------------------------------
				2669	\| Returns 1 if the single-precision floating-point value `a' is less than
				2670	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2671	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2672	\| Standard for Binary Floating-Point Arithmetic.
				2673	----------------------------------------------------------------------------/
				2674
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2675	int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2676	{
				2677	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2678	uint32_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2679	a = float32_squash_input_denormal(a STATUS_VAR);
				2680	b = float32_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2681
				2682	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2683	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2684	) {
				2685	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2686	float_raise( float_flag_invalid STATUS_VAR);
				2687	}
				2688	return 0;
				2689	}
				2690	aSign = extractFloat32Sign( a );
				2691	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2692	av = float32_val(a);
				2693	bv = float32_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2694	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2695	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2696
				2697	}
				2698
				2699	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	2700	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
				2701	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				2702	\| comparison is performed according to the IEC/IEEE Standard for Binary
				2703	\| Floating-Point Arithmetic.
				2704	----------------------------------------------------------------------------/
				2705
				2706	int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
				2707	{
				2708	a = float32_squash_input_denormal(a STATUS_VAR);
				2709	b = float32_squash_input_denormal(b STATUS_VAR);
				2710
				2711	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2712	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2713	) {
				2714	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2715	float_raise( float_flag_invalid STATUS_VAR);
				2716	}
				2717	return 1;
				2718	}
				2719	return 0;
				2720	}
				2721
				2722	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2723	\| Returns the result of converting the double-precision floating-point value
				2724	\| `a' to the 32-bit two's complement integer format. The conversion is
				2725	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2726	\| Arithmetic---which means in particular that the conversion is rounded
				2727	\| according to the current rounding mode. If `a' is a NaN, the largest
				2728	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2729	\| largest integer with the same sign as `a' is returned.
				2730	----------------------------------------------------------------------------/
				2731
				2732	int32 float64_to_int32( float64 a STATUS_PARAM )
				2733	{
				2734	flag aSign;
				2735	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2736	uint64_t aSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2737	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2738
				2739	aSig = extractFloat64Frac( a );
				2740	aExp = extractFloat64Exp( a );
				2741	aSign = extractFloat64Sign( a );
				2742	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2743	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2744	shiftCount = 0x42C - aExp;
				2745	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				2746	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				2747
				2748	}
				2749
				2750	/*----------------------------------------------------------------------------
				2751	\| Returns the result of converting the double-precision floating-point value
				2752	\| `a' to the 32-bit two's complement integer format. The conversion is
				2753	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2754	\| Arithmetic, except that the conversion is always rounded toward zero.
				2755	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2756	\| the conversion overflows, the largest integer with the same sign as `a' is
				2757	\| returned.
				2758	----------------------------------------------------------------------------/
				2759
				2760	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
				2761	{
				2762	flag aSign;
				2763	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2764	uint64_t aSig, savedASig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2765	int32 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2766	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2767
				2768	aSig = extractFloat64Frac( a );
				2769	aExp = extractFloat64Exp( a );
				2770	aSign = extractFloat64Sign( a );
				2771	if ( 0x41E < aExp ) {
				2772	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2773	goto invalid;
				2774	}
				2775	else if ( aExp < 0x3FF ) {
				2776	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2777	return 0;
				2778	}
				2779	aSig \|= LIT64( 0x0010000000000000 );
				2780	shiftCount = 0x433 - aExp;
				2781	savedASig = aSig;
				2782	aSig >>= shiftCount;
				2783	z = aSig;
				2784	if ( aSign ) z = - z;
				2785	if ( ( z < 0 ) ^ aSign ) {
				2786	invalid:
				2787	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2788	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2789	}
				2790	if ( ( aSig<<shiftCount ) != savedASig ) {
				2791	STATUS(float_exception_flags) \|= float_flag_inexact;
				2792	}
				2793	return z;
				2794
				2795	}
				2796
				2797	/*----------------------------------------------------------------------------
				2798	\| Returns the result of converting the double-precision floating-point value
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2799	\| `a' to the 16-bit two's complement integer format. The conversion is
				2800	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2801	\| Arithmetic, except that the conversion is always rounded toward zero.
				2802	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2803	\| the conversion overflows, the largest integer with the same sign as `a' is
				2804	\| returned.
				2805	----------------------------------------------------------------------------/
				2806
				2807	int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
				2808	{
				2809	flag aSign;
				2810	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2811	uint64_t aSig, savedASig;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2812	int32 z;
				2813
				2814	aSig = extractFloat64Frac( a );
				2815	aExp = extractFloat64Exp( a );
				2816	aSign = extractFloat64Sign( a );
				2817	if ( 0x40E < aExp ) {
				2818	if ( ( aExp == 0x7FF ) && aSig ) {
				2819	aSign = 0;
				2820	}
				2821	goto invalid;
				2822	}
				2823	else if ( aExp < 0x3FF ) {
				2824	if ( aExp \|\| aSig ) {
				2825	STATUS(float_exception_flags) \|= float_flag_inexact;
				2826	}
				2827	return 0;
				2828	}
				2829	aSig \|= LIT64( 0x0010000000000000 );
				2830	shiftCount = 0x433 - aExp;
				2831	savedASig = aSig;
				2832	aSig >>= shiftCount;
				2833	z = aSig;
				2834	if ( aSign ) {
				2835	z = - z;
				2836	}
				2837	if ( ( (int16_t)z < 0 ) ^ aSign ) {
				2838	invalid:
				2839	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2840	return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	2841	}
				2842	if ( ( aSig<<shiftCount ) != savedASig ) {
				2843	STATUS(float_exception_flags) \|= float_flag_inexact;
				2844	}
				2845	return z;
				2846	}
				2847
				2848	/*----------------------------------------------------------------------------
				2849	\| Returns the result of converting the double-precision floating-point value
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2850	\| `a' to the 64-bit two's complement integer format. The conversion is
				2851	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2852	\| Arithmetic---which means in particular that the conversion is rounded
				2853	\| according to the current rounding mode. If `a' is a NaN, the largest
				2854	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2855	\| largest integer with the same sign as `a' is returned.
				2856	----------------------------------------------------------------------------/
				2857
				2858	int64 float64_to_int64( float64 a STATUS_PARAM )
				2859	{
				2860	flag aSign;
				2861	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2862	uint64_t aSig, aSigExtra;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2863	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2864
				2865	aSig = extractFloat64Frac( a );
				2866	aExp = extractFloat64Exp( a );
				2867	aSign = extractFloat64Sign( a );
				2868	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2869	shiftCount = 0x433 - aExp;
				2870	if ( shiftCount <= 0 ) {
				2871	if ( 0x43E < aExp ) {
				2872	float_raise( float_flag_invalid STATUS_VAR);
				2873	if ( ! aSign
				2874	\|\| ( ( aExp == 0x7FF )
				2875	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2876	) {
				2877	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2878	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2879	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2880	}
				2881	aSigExtra = 0;
				2882	aSig <<= - shiftCount;
				2883	}
				2884	else {
				2885	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				2886	}
				2887	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				2888
				2889	}
				2890
				2891	/*----------------------------------------------------------------------------
				2892	\| Returns the result of converting the double-precision floating-point value
				2893	\| `a' to the 64-bit two's complement integer format. The conversion is
				2894	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2895	\| Arithmetic, except that the conversion is always rounded toward zero.
				2896	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2897	\| the conversion overflows, the largest integer with the same sign as `a' is
				2898	\| returned.
				2899	----------------------------------------------------------------------------/
				2900
				2901	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
				2902	{
				2903	flag aSign;
				2904	int16 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2905	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2906	int64 z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2907	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2908
				2909	aSig = extractFloat64Frac( a );
				2910	aExp = extractFloat64Exp( a );
				2911	aSign = extractFloat64Sign( a );
				2912	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2913	shiftCount = aExp - 0x433;
				2914	if ( 0 <= shiftCount ) {
				2915	if ( 0x43E <= aExp ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2916	if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2917	float_raise( float_flag_invalid STATUS_VAR);
				2918	if ( ! aSign
				2919	\|\| ( ( aExp == 0x7FF )
				2920	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2921	) {
				2922	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2923	}
				2924	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2925	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2926	}
				2927	z = aSig<<shiftCount;
				2928	}
				2929	else {
				2930	if ( aExp < 0x3FE ) {
				2931	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2932	return 0;
				2933	}
				2934	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2935	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2936	STATUS(float_exception_flags) \|= float_flag_inexact;
				2937	}
				2938	}
				2939	if ( aSign ) z = - z;
				2940	return z;
				2941
				2942	}
				2943
				2944	/*----------------------------------------------------------------------------
				2945	\| Returns the result of converting the double-precision floating-point value
				2946	\| `a' to the single-precision floating-point format. The conversion is
				2947	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2948	\| Arithmetic.
				2949	----------------------------------------------------------------------------/
				2950
				2951	float32 float64_to_float32( float64 a STATUS_PARAM )
				2952	{
				2953	flag aSign;
				2954	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2955	uint64_t aSig;
				2956	uint32_t zSig;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	2957	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2958
				2959	aSig = extractFloat64Frac( a );
				2960	aExp = extractFloat64Exp( a );
				2961	aSign = extractFloat64Sign( a );
				2962	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	2963	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2964	return packFloat32( aSign, 0xFF, 0 );
				2965	}
				2966	shift64RightJamming( aSig, 22, &aSig );
				2967	zSig = aSig;
				2968	if ( aExp \|\| zSig ) {
				2969	zSig \|= 0x40000000;
				2970	aExp -= 0x381;
				2971	}
				2972	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				2973
				2974	}
				2975
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	2976
				2977	/*----------------------------------------------------------------------------
				2978	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				2979	\| half-precision floating-point value, returning the result. After being
				2980	\| shifted into the proper positions, the three fields are simply added
				2981	\| together to form the result. This means that any integer portion of `zSig'
				2982	\| will be added into the exponent. Since a properly normalized significand
				2983	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				2984	\| than the desired result exponent whenever `zSig' is a complete, normalized
				2985	\| significand.
				2986	----------------------------------------------------------------------------/
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2987	static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	2988	{
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	2989	return make_float16(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	2990	(((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	2991	}
				2992
				2993	/* Half precision floats come in two formats: standard IEEE and "ARM" format.
				2994	The latter gains extra exponent range by omitting the NaN/Inf encodings. */
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	2995
				2996	float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	2997	{
				2998	flag aSign;
				2999	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3000	uint32_t aSig;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3001
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3002	aSign = extractFloat16Sign(a);
				3003	aExp = extractFloat16Exp(a);
				3004	aSig = extractFloat16Frac(a);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3005
				3006	if (aExp == 0x1f && ieee) {
				3007	if (aSig) {
Peter Maydell	f591e1b	2011-02-10 11:28:59 +0000	[diff] [blame]	3008	return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3009	}
				3010	return packFloat32(aSign, 0xff, aSig << 13);
				3011	}
				3012	if (aExp == 0) {
				3013	int8 shiftCount;
				3014
				3015	if (aSig == 0) {
				3016	return packFloat32(aSign, 0, 0);
				3017	}
				3018
				3019	shiftCount = countLeadingZeros32( aSig ) - 21;
				3020	aSig = aSig << shiftCount;
				3021	aExp = -shiftCount;
				3022	}
				3023	return packFloat32( aSign, aExp + 0x70, aSig << 13);
				3024	}
				3025
Peter Maydell	bb4d4bb	2011-02-10 11:28:56 +0000	[diff] [blame]	3026	float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3027	{
				3028	flag aSign;
				3029	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3030	uint32_t aSig;
				3031	uint32_t mask;
				3032	uint32_t increment;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3033	int8 roundingMode;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3034	a = float32_squash_input_denormal(a STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3035
				3036	aSig = extractFloat32Frac( a );
				3037	aExp = extractFloat32Exp( a );
				3038	aSign = extractFloat32Sign( a );
				3039	if ( aExp == 0xFF ) {
				3040	if (aSig) {
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3041	/* Input is a NaN */
				3042	float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
				3043	if (!ieee) {
				3044	return packFloat16(aSign, 0, 0);
				3045	}
				3046	return r;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3047	}
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3048	/* Infinity */
				3049	if (!ieee) {
				3050	float_raise(float_flag_invalid STATUS_VAR);
				3051	return packFloat16(aSign, 0x1f, 0x3ff);
				3052	}
				3053	return packFloat16(aSign, 0x1f, 0);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3054	}
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3055	if (aExp == 0 && aSig == 0) {
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3056	return packFloat16(aSign, 0, 0);
				3057	}
				3058	/* Decimal point between bits 22 and 23. */
				3059	aSig \|= 0x00800000;
				3060	aExp -= 0x7f;
				3061	if (aExp < -14) {
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3062	mask = 0x00ffffff;
				3063	if (aExp >= -24) {
				3064	mask >>= 25 + aExp;
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3065	}
				3066	} else {
				3067	mask = 0x00001fff;
				3068	}
				3069	if (aSig & mask) {
				3070	float_raise( float_flag_underflow STATUS_VAR );
				3071	roundingMode = STATUS(float_rounding_mode);
				3072	switch (roundingMode) {
				3073	case float_round_nearest_even:
				3074	increment = (mask + 1) >> 1;
				3075	if ((aSig & mask) == increment) {
				3076	increment = aSig & (increment << 1);
				3077	}
				3078	break;
				3079	case float_round_up:
				3080	increment = aSign ? 0 : mask;
				3081	break;
				3082	case float_round_down:
				3083	increment = aSign ? mask : 0;
				3084	break;
				3085	default: /* round_to_zero */
				3086	increment = 0;
				3087	break;
				3088	}
				3089	aSig += increment;
				3090	if (aSig >= 0x01000000) {
				3091	aSig >>= 1;
				3092	aExp++;
				3093	}
				3094	} else if (aExp < -14
				3095	&& STATUS(float_detect_tininess) == float_tininess_before_rounding) {
				3096	float_raise( float_flag_underflow STATUS_VAR);
				3097	}
				3098
				3099	if (ieee) {
				3100	if (aExp > 15) {
				3101	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				3102	return packFloat16(aSign, 0x1f, 0);
				3103	}
				3104	} else {
				3105	if (aExp > 16) {
Peter Maydell	600e30d	2011-02-10 11:28:58 +0000	[diff] [blame]	3106	float_raise(float_flag_invalid \| float_flag_inexact STATUS_VAR);
Paul Brook	6001149	2009-11-19 16:45:20 +0000	[diff] [blame]	3107	return packFloat16(aSign, 0x1f, 0x3ff);
				3108	}
				3109	}
				3110	if (aExp < -24) {
				3111	return packFloat16(aSign, 0, 0);
				3112	}
				3113	if (aExp < -14) {
				3114	aSig >>= -14 - aExp;
				3115	aExp = -14;
				3116	}
				3117	return packFloat16(aSign, aExp + 14, aSig >> 13);
				3118	}
				3119
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3120	/*----------------------------------------------------------------------------
				3121	\| Returns the result of converting the double-precision floating-point value
				3122	\| `a' to the extended double-precision floating-point format. The conversion
				3123	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				3124	\| Arithmetic.
				3125	----------------------------------------------------------------------------/
				3126
				3127	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
				3128	{
				3129	flag aSign;
				3130	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3131	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3132
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3133	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3134	aSig = extractFloat64Frac( a );
				3135	aExp = extractFloat64Exp( a );
				3136	aSign = extractFloat64Sign( a );
				3137	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	3138	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3139	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3140	}
				3141	if ( aExp == 0 ) {
				3142	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				3143	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3144	}
				3145	return
				3146	packFloatx80(
				3147	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				3148
				3149	}
				3150
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3151	/*----------------------------------------------------------------------------
				3152	\| Returns the result of converting the double-precision floating-point value
				3153	\| `a' to the quadruple-precision floating-point format. The conversion is
				3154	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3155	\| Arithmetic.
				3156	----------------------------------------------------------------------------/
				3157
				3158	float128 float64_to_float128( float64 a STATUS_PARAM )
				3159	{
				3160	flag aSign;
				3161	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3162	uint64_t aSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3163
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3164	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3165	aSig = extractFloat64Frac( a );
				3166	aExp = extractFloat64Exp( a );
				3167	aSign = extractFloat64Sign( a );
				3168	if ( aExp == 0x7FF ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	3169	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3170	return packFloat128( aSign, 0x7FFF, 0, 0 );
				3171	}
				3172	if ( aExp == 0 ) {
				3173	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				3174	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3175	--aExp;
				3176	}
				3177	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
				3178	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
				3179
				3180	}
				3181
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3182	/*----------------------------------------------------------------------------
				3183	\| Rounds the double-precision floating-point value `a' to an integer, and
				3184	\| returns the result as a double-precision floating-point value. The
				3185	\| operation is performed according to the IEC/IEEE Standard for Binary
				3186	\| Floating-Point Arithmetic.
				3187	----------------------------------------------------------------------------/
				3188
				3189	float64 float64_round_to_int( float64 a STATUS_PARAM )
				3190	{
				3191	flag aSign;
				3192	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3193	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3194	int8 roundingMode;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3195	uint64_t z;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3196	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3197
				3198	aExp = extractFloat64Exp( a );
				3199	if ( 0x433 <= aExp ) {
				3200	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				3201	return propagateFloat64NaN( a, a STATUS_VAR );
				3202	}
				3203	return a;
				3204	}
				3205	if ( aExp < 0x3FF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3206	if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3207	STATUS(float_exception_flags) \|= float_flag_inexact;
				3208	aSign = extractFloat64Sign( a );
				3209	switch ( STATUS(float_rounding_mode) ) {
				3210	case float_round_nearest_even:
				3211	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				3212	return packFloat64( aSign, 0x3FF, 0 );
				3213	}
				3214	break;
				3215	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3216	return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3217	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3218	return make_float64(
				3219	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3220	}
				3221	return packFloat64( aSign, 0, 0 );
				3222	}
				3223	lastBitMask = 1;
				3224	lastBitMask <<= 0x433 - aExp;
				3225	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3226	z = float64_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3227	roundingMode = STATUS(float_rounding_mode);
				3228	if ( roundingMode == float_round_nearest_even ) {
				3229	z += lastBitMask>>1;
				3230	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				3231	}
				3232	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3233	if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3234	z += roundBitsMask;
				3235	}
				3236	}
				3237	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3238	if ( z != float64_val(a) )
				3239	STATUS(float_exception_flags) \|= float_flag_inexact;
				3240	return make_float64(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3241
				3242	}
				3243
pbrook	e6e5906	2006-10-22 00:18:54 +0000	[diff] [blame]	3244	float64 float64_trunc_to_int( float64 a STATUS_PARAM)
				3245	{
				3246	int oldmode;
				3247	float64 res;
				3248	oldmode = STATUS(float_rounding_mode);
				3249	STATUS(float_rounding_mode) = float_round_to_zero;
				3250	res = float64_round_to_int(a STATUS_VAR);
				3251	STATUS(float_rounding_mode) = oldmode;
				3252	return res;
				3253	}
				3254
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3255	/*----------------------------------------------------------------------------
				3256	\| Returns the result of adding the absolute values of the double-precision
				3257	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				3258	\| before being returned. `zSign' is ignored if the result is a NaN.
				3259	\| The addition is performed according to the IEC/IEEE Standard for Binary
				3260	\| Floating-Point Arithmetic.
				3261	----------------------------------------------------------------------------/
				3262
				3263	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				3264	{
				3265	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3266	uint64_t aSig, bSig, zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3267	int16 expDiff;
				3268
				3269	aSig = extractFloat64Frac( a );
				3270	aExp = extractFloat64Exp( a );
				3271	bSig = extractFloat64Frac( b );
				3272	bExp = extractFloat64Exp( b );
				3273	expDiff = aExp - bExp;
				3274	aSig <<= 9;
				3275	bSig <<= 9;
				3276	if ( 0 < expDiff ) {
				3277	if ( aExp == 0x7FF ) {
				3278	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3279	return a;
				3280	}
				3281	if ( bExp == 0 ) {
				3282	--expDiff;
				3283	}
				3284	else {
				3285	bSig \|= LIT64( 0x2000000000000000 );
				3286	}
				3287	shift64RightJamming( bSig, expDiff, &bSig );
				3288	zExp = aExp;
				3289	}
				3290	else if ( expDiff < 0 ) {
				3291	if ( bExp == 0x7FF ) {
				3292	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3293	return packFloat64( zSign, 0x7FF, 0 );
				3294	}
				3295	if ( aExp == 0 ) {
				3296	++expDiff;
				3297	}
				3298	else {
				3299	aSig \|= LIT64( 0x2000000000000000 );
				3300	}
				3301	shift64RightJamming( aSig, - expDiff, &aSig );
				3302	zExp = bExp;
				3303	}
				3304	else {
				3305	if ( aExp == 0x7FF ) {
				3306	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3307	return a;
				3308	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	3309	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	3310	if (STATUS(flush_to_zero)) {
				3311	if (aSig \| bSig) {
				3312	float_raise(float_flag_output_denormal STATUS_VAR);
				3313	}
				3314	return packFloat64(zSign, 0, 0);
				3315	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	3316	return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				3317	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3318	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				3319	zExp = aExp;
				3320	goto roundAndPack;
				3321	}
				3322	aSig \|= LIT64( 0x2000000000000000 );
				3323	zSig = ( aSig + bSig )<<1;
				3324	--zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3325	if ( (int64_t) zSig < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3326	zSig = aSig + bSig;
				3327	++zExp;
				3328	}
				3329	roundAndPack:
				3330	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3331
				3332	}
				3333
				3334	/*----------------------------------------------------------------------------
				3335	\| Returns the result of subtracting the absolute values of the double-
				3336	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				3337	\| difference is negated before being returned. `zSign' is ignored if the
				3338	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				3339	\| Standard for Binary Floating-Point Arithmetic.
				3340	----------------------------------------------------------------------------/
				3341
				3342	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				3343	{
				3344	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3345	uint64_t aSig, bSig, zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3346	int16 expDiff;
				3347
				3348	aSig = extractFloat64Frac( a );
				3349	aExp = extractFloat64Exp( a );
				3350	bSig = extractFloat64Frac( b );
				3351	bExp = extractFloat64Exp( b );
				3352	expDiff = aExp - bExp;
				3353	aSig <<= 10;
				3354	bSig <<= 10;
				3355	if ( 0 < expDiff ) goto aExpBigger;
				3356	if ( expDiff < 0 ) goto bExpBigger;
				3357	if ( aExp == 0x7FF ) {
				3358	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3359	float_raise( float_flag_invalid STATUS_VAR);
				3360	return float64_default_nan;
				3361	}
				3362	if ( aExp == 0 ) {
				3363	aExp = 1;
				3364	bExp = 1;
				3365	}
				3366	if ( bSig < aSig ) goto aBigger;
				3367	if ( aSig < bSig ) goto bBigger;
				3368	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				3369	bExpBigger:
				3370	if ( bExp == 0x7FF ) {
				3371	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3372	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				3373	}
				3374	if ( aExp == 0 ) {
				3375	++expDiff;
				3376	}
				3377	else {
				3378	aSig \|= LIT64( 0x4000000000000000 );
				3379	}
				3380	shift64RightJamming( aSig, - expDiff, &aSig );
				3381	bSig \|= LIT64( 0x4000000000000000 );
				3382	bBigger:
				3383	zSig = bSig - aSig;
				3384	zExp = bExp;
				3385	zSign ^= 1;
				3386	goto normalizeRoundAndPack;
				3387	aExpBigger:
				3388	if ( aExp == 0x7FF ) {
				3389	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3390	return a;
				3391	}
				3392	if ( bExp == 0 ) {
				3393	--expDiff;
				3394	}
				3395	else {
				3396	bSig \|= LIT64( 0x4000000000000000 );
				3397	}
				3398	shift64RightJamming( bSig, expDiff, &bSig );
				3399	aSig \|= LIT64( 0x4000000000000000 );
				3400	aBigger:
				3401	zSig = aSig - bSig;
				3402	zExp = aExp;
				3403	normalizeRoundAndPack:
				3404	--zExp;
				3405	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3406
				3407	}
				3408
				3409	/*----------------------------------------------------------------------------
				3410	\| Returns the result of adding the double-precision floating-point values `a'
				3411	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				3412	\| Binary Floating-Point Arithmetic.
				3413	----------------------------------------------------------------------------/
				3414
				3415	float64 float64_add( float64 a, float64 b STATUS_PARAM )
				3416	{
				3417	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3418	a = float64_squash_input_denormal(a STATUS_VAR);
				3419	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3420
				3421	aSign = extractFloat64Sign( a );
				3422	bSign = extractFloat64Sign( b );
				3423	if ( aSign == bSign ) {
				3424	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				3425	}
				3426	else {
				3427	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				3428	}
				3429
				3430	}
				3431
				3432	/*----------------------------------------------------------------------------
				3433	\| Returns the result of subtracting the double-precision floating-point values
				3434	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				3435	\| for Binary Floating-Point Arithmetic.
				3436	----------------------------------------------------------------------------/
				3437
				3438	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
				3439	{
				3440	flag aSign, bSign;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3441	a = float64_squash_input_denormal(a STATUS_VAR);
				3442	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3443
				3444	aSign = extractFloat64Sign( a );
				3445	bSign = extractFloat64Sign( b );
				3446	if ( aSign == bSign ) {
				3447	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				3448	}
				3449	else {
				3450	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				3451	}
				3452
				3453	}
				3454
				3455	/*----------------------------------------------------------------------------
				3456	\| Returns the result of multiplying the double-precision floating-point values
				3457	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				3458	\| for Binary Floating-Point Arithmetic.
				3459	----------------------------------------------------------------------------/
				3460
				3461	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
				3462	{
				3463	flag aSign, bSign, zSign;
				3464	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3465	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3466
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3467	a = float64_squash_input_denormal(a STATUS_VAR);
				3468	b = float64_squash_input_denormal(b STATUS_VAR);
				3469
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3470	aSig = extractFloat64Frac( a );
				3471	aExp = extractFloat64Exp( a );
				3472	aSign = extractFloat64Sign( a );
				3473	bSig = extractFloat64Frac( b );
				3474	bExp = extractFloat64Exp( b );
				3475	bSign = extractFloat64Sign( b );
				3476	zSign = aSign ^ bSign;
				3477	if ( aExp == 0x7FF ) {
				3478	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				3479	return propagateFloat64NaN( a, b STATUS_VAR );
				3480	}
				3481	if ( ( bExp \| bSig ) == 0 ) {
				3482	float_raise( float_flag_invalid STATUS_VAR);
				3483	return float64_default_nan;
				3484	}
				3485	return packFloat64( zSign, 0x7FF, 0 );
				3486	}
				3487	if ( bExp == 0x7FF ) {
				3488	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3489	if ( ( aExp \| aSig ) == 0 ) {
				3490	float_raise( float_flag_invalid STATUS_VAR);
				3491	return float64_default_nan;
				3492	}
				3493	return packFloat64( zSign, 0x7FF, 0 );
				3494	}
				3495	if ( aExp == 0 ) {
				3496	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				3497	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3498	}
				3499	if ( bExp == 0 ) {
				3500	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				3501	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3502	}
				3503	zExp = aExp + bExp - 0x3FF;
				3504	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				3505	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3506	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				3507	zSig0 \|= ( zSig1 != 0 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3508	if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3509	zSig0 <<= 1;
				3510	--zExp;
				3511	}
				3512	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
				3513
				3514	}
				3515
				3516	/*----------------------------------------------------------------------------
				3517	\| Returns the result of dividing the double-precision floating-point value `a'
				3518	\| by the corresponding value `b'. The operation is performed according to
				3519	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3520	----------------------------------------------------------------------------/
				3521
				3522	float64 float64_div( float64 a, float64 b STATUS_PARAM )
				3523	{
				3524	flag aSign, bSign, zSign;
				3525	int16 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3526	uint64_t aSig, bSig, zSig;
				3527	uint64_t rem0, rem1;
				3528	uint64_t term0, term1;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3529	a = float64_squash_input_denormal(a STATUS_VAR);
				3530	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3531
				3532	aSig = extractFloat64Frac( a );
				3533	aExp = extractFloat64Exp( a );
				3534	aSign = extractFloat64Sign( a );
				3535	bSig = extractFloat64Frac( b );
				3536	bExp = extractFloat64Exp( b );
				3537	bSign = extractFloat64Sign( b );
				3538	zSign = aSign ^ bSign;
				3539	if ( aExp == 0x7FF ) {
				3540	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3541	if ( bExp == 0x7FF ) {
				3542	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3543	float_raise( float_flag_invalid STATUS_VAR);
				3544	return float64_default_nan;
				3545	}
				3546	return packFloat64( zSign, 0x7FF, 0 );
				3547	}
				3548	if ( bExp == 0x7FF ) {
				3549	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3550	return packFloat64( zSign, 0, 0 );
				3551	}
				3552	if ( bExp == 0 ) {
				3553	if ( bSig == 0 ) {
				3554	if ( ( aExp \| aSig ) == 0 ) {
				3555	float_raise( float_flag_invalid STATUS_VAR);
				3556	return float64_default_nan;
				3557	}
				3558	float_raise( float_flag_divbyzero STATUS_VAR);
				3559	return packFloat64( zSign, 0x7FF, 0 );
				3560	}
				3561	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3562	}
				3563	if ( aExp == 0 ) {
				3564	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				3565	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3566	}
				3567	zExp = aExp - bExp + 0x3FD;
				3568	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				3569	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3570	if ( bSig <= ( aSig + aSig ) ) {
				3571	aSig >>= 1;
				3572	++zExp;
				3573	}
				3574	zSig = estimateDiv128To64( aSig, 0, bSig );
				3575	if ( ( zSig & 0x1FF ) <= 2 ) {
				3576	mul64To128( bSig, zSig, &term0, &term1 );
				3577	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3578	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3579	--zSig;
				3580	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3581	}
				3582	zSig \|= ( rem1 != 0 );
				3583	}
				3584	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				3585
				3586	}
				3587
				3588	/*----------------------------------------------------------------------------
				3589	\| Returns the remainder of the double-precision floating-point value `a'
				3590	\| with respect to the corresponding value `b'. The operation is performed
				3591	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3592	----------------------------------------------------------------------------/
				3593
				3594	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
				3595	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	3596	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3597	int16 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3598	uint64_t aSig, bSig;
				3599	uint64_t q, alternateASig;
				3600	int64_t sigMean;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3601
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3602	a = float64_squash_input_denormal(a STATUS_VAR);
				3603	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3604	aSig = extractFloat64Frac( a );
				3605	aExp = extractFloat64Exp( a );
				3606	aSign = extractFloat64Sign( a );
				3607	bSig = extractFloat64Frac( b );
				3608	bExp = extractFloat64Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3609	if ( aExp == 0x7FF ) {
				3610	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				3611	return propagateFloat64NaN( a, b STATUS_VAR );
				3612	}
				3613	float_raise( float_flag_invalid STATUS_VAR);
				3614	return float64_default_nan;
				3615	}
				3616	if ( bExp == 0x7FF ) {
				3617	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				3618	return a;
				3619	}
				3620	if ( bExp == 0 ) {
				3621	if ( bSig == 0 ) {
				3622	float_raise( float_flag_invalid STATUS_VAR);
				3623	return float64_default_nan;
				3624	}
				3625	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				3626	}
				3627	if ( aExp == 0 ) {
				3628	if ( aSig == 0 ) return a;
				3629	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3630	}
				3631	expDiff = aExp - bExp;
				3632	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				3633	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				3634	if ( expDiff < 0 ) {
				3635	if ( expDiff < -1 ) return a;
				3636	aSig >>= 1;
				3637	}
				3638	q = ( bSig <= aSig );
				3639	if ( q ) aSig -= bSig;
				3640	expDiff -= 64;
				3641	while ( 0 < expDiff ) {
				3642	q = estimateDiv128To64( aSig, 0, bSig );
				3643	q = ( 2 < q ) ? q - 2 : 0;
				3644	aSig = - ( ( bSig>>2 ) * q );
				3645	expDiff -= 62;
				3646	}
				3647	expDiff += 64;
				3648	if ( 0 < expDiff ) {
				3649	q = estimateDiv128To64( aSig, 0, bSig );
				3650	q = ( 2 < q ) ? q - 2 : 0;
				3651	q >>= 64 - expDiff;
				3652	bSig >>= 2;
				3653	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				3654	}
				3655	else {
				3656	aSig >>= 2;
				3657	bSig >>= 2;
				3658	}
				3659	do {
				3660	alternateASig = aSig;
				3661	++q;
				3662	aSig -= bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3663	} while ( 0 <= (int64_t) aSig );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3664	sigMean = aSig + alternateASig;
				3665	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				3666	aSig = alternateASig;
				3667	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3668	zSign = ( (int64_t) aSig < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3669	if ( zSign ) aSig = - aSig;
				3670	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
				3671
				3672	}
				3673
				3674	/*----------------------------------------------------------------------------
Peter Maydell	369be8f	2011-10-19 16:14:06 +0000	[diff] [blame]	3675	\| Returns the result of multiplying the double-precision floating-point values
				3676	\| `a' and `b' then adding 'c', with no intermediate rounding step after the
				3677	\| multiplication. The operation is performed according to the IEC/IEEE
				3678	\| Standard for Binary Floating-Point Arithmetic 754-2008.
				3679	\| The flags argument allows the caller to select negation of the
				3680	\| addend, the intermediate product, or the final result. (The difference
				3681	\| between this and having the caller do a separate negation is that negating
				3682	\| externally will flip the sign bit on NaNs.)
				3683	----------------------------------------------------------------------------/
				3684
				3685	float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
				3686	{
				3687	flag aSign, bSign, cSign, zSign;
				3688	int aExp, bExp, cExp, pExp, zExp, expDiff;
				3689	uint64_t aSig, bSig, cSig;
				3690	flag pInf, pZero, pSign;
				3691	uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
				3692	int shiftcount;
				3693	flag signflip, infzero;
				3694
				3695	a = float64_squash_input_denormal(a STATUS_VAR);
				3696	b = float64_squash_input_denormal(b STATUS_VAR);
				3697	c = float64_squash_input_denormal(c STATUS_VAR);
				3698	aSig = extractFloat64Frac(a);
				3699	aExp = extractFloat64Exp(a);
				3700	aSign = extractFloat64Sign(a);
				3701	bSig = extractFloat64Frac(b);
				3702	bExp = extractFloat64Exp(b);
				3703	bSign = extractFloat64Sign(b);
				3704	cSig = extractFloat64Frac(c);
				3705	cExp = extractFloat64Exp(c);
				3706	cSign = extractFloat64Sign(c);
				3707
				3708	infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) \|\|
				3709	(aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
				3710
				3711	/* It is implementation-defined whether the cases of (0,inf,qnan)
				3712	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
				3713	* they return if they do), so we have to hand this information
				3714	* off to the target-specific pick-a-NaN routine.
				3715	*/
				3716	if (((aExp == 0x7ff) && aSig) \|\|
				3717	((bExp == 0x7ff) && bSig) \|\|
				3718	((cExp == 0x7ff) && cSig)) {
				3719	return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
				3720	}
				3721
				3722	if (infzero) {
				3723	float_raise(float_flag_invalid STATUS_VAR);
				3724	return float64_default_nan;
				3725	}
				3726
				3727	if (flags & float_muladd_negate_c) {
				3728	cSign ^= 1;
				3729	}
				3730
				3731	signflip = (flags & float_muladd_negate_result) ? 1 : 0;
				3732
				3733	/* Work out the sign and type of the product */
				3734	pSign = aSign ^ bSign;
				3735	if (flags & float_muladd_negate_product) {
				3736	pSign ^= 1;
				3737	}
				3738	pInf = (aExp == 0x7ff) \|\| (bExp == 0x7ff);
				3739	pZero = ((aExp \| aSig) == 0) \|\| ((bExp \| bSig) == 0);
				3740
				3741	if (cExp == 0x7ff) {
				3742	if (pInf && (pSign ^ cSign)) {
				3743	/* addition of opposite-signed infinities => InvalidOperation */
				3744	float_raise(float_flag_invalid STATUS_VAR);
				3745	return float64_default_nan;
				3746	}
				3747	/* Otherwise generate an infinity of the same sign */
				3748	return packFloat64(cSign ^ signflip, 0x7ff, 0);
				3749	}
				3750
				3751	if (pInf) {
				3752	return packFloat64(pSign ^ signflip, 0x7ff, 0);
				3753	}
				3754
				3755	if (pZero) {
				3756	if (cExp == 0) {
				3757	if (cSig == 0) {
				3758	/* Adding two exact zeroes */
				3759	if (pSign == cSign) {
				3760	zSign = pSign;
				3761	} else if (STATUS(float_rounding_mode) == float_round_down) {
				3762	zSign = 1;
				3763	} else {
				3764	zSign = 0;
				3765	}
				3766	return packFloat64(zSign ^ signflip, 0, 0);
				3767	}
				3768	/* Exact zero plus a denorm */
				3769	if (STATUS(flush_to_zero)) {
				3770	float_raise(float_flag_output_denormal STATUS_VAR);
				3771	return packFloat64(cSign ^ signflip, 0, 0);
				3772	}
				3773	}
				3774	/* Zero plus something non-zero : just return the something */
				3775	return c ^ ((uint64_t)signflip << 63);
				3776	}
				3777
				3778	if (aExp == 0) {
				3779	normalizeFloat64Subnormal(aSig, &aExp, &aSig);
				3780	}
				3781	if (bExp == 0) {
				3782	normalizeFloat64Subnormal(bSig, &bExp, &bSig);
				3783	}
				3784
				3785	/* Calculate the actual result a * b + c */
				3786
				3787	/* Multiply first; this is easy. */
				3788	/* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
				3789	* because we want the true exponent, not the "one-less-than"
				3790	* flavour that roundAndPackFloat64() takes.
				3791	*/
				3792	pExp = aExp + bExp - 0x3fe;
				3793	aSig = (aSig \| LIT64(0x0010000000000000))<<10;
				3794	bSig = (bSig \| LIT64(0x0010000000000000))<<11;
				3795	mul64To128(aSig, bSig, &pSig0, &pSig1);
				3796	if ((int64_t)(pSig0 << 1) >= 0) {
				3797	shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
				3798	pExp--;
				3799	}
				3800
				3801	zSign = pSign ^ signflip;
				3802
				3803	/* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
				3804	* bit in position 126.
				3805	*/
				3806	if (cExp == 0) {
				3807	if (!cSig) {
				3808	/* Throw out the special case of c being an exact zero now */
				3809	shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
				3810	return roundAndPackFloat64(zSign, pExp - 1,
				3811	pSig1 STATUS_VAR);
				3812	}
				3813	normalizeFloat64Subnormal(cSig, &cExp, &cSig);
				3814	}
				3815
				3816	/* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
				3817	* significand of the addend, with the explicit bit in position 126.
				3818	*/
				3819	cSig0 = cSig << (126 - 64 - 52);
				3820	cSig1 = 0;
				3821	cSig0 \|= LIT64(0x4000000000000000);
				3822	expDiff = pExp - cExp;
				3823
				3824	if (pSign == cSign) {
				3825	/* Addition */
				3826	if (expDiff > 0) {
				3827	/* scale c to match p */
				3828	shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
				3829	zExp = pExp;
				3830	} else if (expDiff < 0) {
				3831	/* scale p to match c */
				3832	shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
				3833	zExp = cExp;
				3834	} else {
				3835	/* no scaling needed */
				3836	zExp = cExp;
				3837	}
				3838	/* Add significands and make sure explicit bit ends up in posn 126 */
				3839	add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				3840	if ((int64_t)zSig0 < 0) {
				3841	shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
				3842	} else {
				3843	zExp--;
				3844	}
				3845	shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
				3846	return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
				3847	} else {
				3848	/* Subtraction */
				3849	if (expDiff > 0) {
				3850	shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
				3851	sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				3852	zExp = pExp;
				3853	} else if (expDiff < 0) {
				3854	shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
				3855	sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
				3856	zExp = cExp;
				3857	zSign ^= 1;
				3858	} else {
				3859	zExp = pExp;
				3860	if (lt128(cSig0, cSig1, pSig0, pSig1)) {
				3861	sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
				3862	} else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
				3863	sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
				3864	zSign ^= 1;
				3865	} else {
				3866	/* Exact zero */
				3867	zSign = signflip;
				3868	if (STATUS(float_rounding_mode) == float_round_down) {
				3869	zSign ^= 1;
				3870	}
				3871	return packFloat64(zSign, 0, 0);
				3872	}
				3873	}
				3874	--zExp;
				3875	/* Do the equivalent of normalizeRoundAndPackFloat64() but
				3876	* starting with the significand in a pair of uint64_t.
				3877	*/
				3878	if (zSig0) {
				3879	shiftcount = countLeadingZeros64(zSig0) - 1;
				3880	shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
				3881	if (zSig1) {
				3882	zSig0 \|= 1;
				3883	}
				3884	zExp -= shiftcount;
				3885	} else {
				3886	shiftcount = countLeadingZeros64(zSig1) - 1;
				3887	zSig0 = zSig1 << shiftcount;
				3888	zExp -= (shiftcount + 64);
				3889	}
				3890	return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
				3891	}
				3892	}
				3893
				3894	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3895	\| Returns the square root of the double-precision floating-point value `a'.
				3896	\| The operation is performed according to the IEC/IEEE Standard for Binary
				3897	\| Floating-Point Arithmetic.
				3898	----------------------------------------------------------------------------/
				3899
				3900	float64 float64_sqrt( float64 a STATUS_PARAM )
				3901	{
				3902	flag aSign;
				3903	int16 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3904	uint64_t aSig, zSig, doubleZSig;
				3905	uint64_t rem0, rem1, term0, term1;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3906	a = float64_squash_input_denormal(a STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3907
				3908	aSig = extractFloat64Frac( a );
				3909	aExp = extractFloat64Exp( a );
				3910	aSign = extractFloat64Sign( a );
				3911	if ( aExp == 0x7FF ) {
				3912	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
				3913	if ( ! aSign ) return a;
				3914	float_raise( float_flag_invalid STATUS_VAR);
				3915	return float64_default_nan;
				3916	}
				3917	if ( aSign ) {
				3918	if ( ( aExp \| aSig ) == 0 ) return a;
				3919	float_raise( float_flag_invalid STATUS_VAR);
				3920	return float64_default_nan;
				3921	}
				3922	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3923	if ( aSig == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3924	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3925	}
				3926	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				3927	aSig \|= LIT64( 0x0010000000000000 );
				3928	zSig = estimateSqrt32( aExp, aSig>>21 );
				3929	aSig <<= 9 - ( aExp & 1 );
				3930	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
				3931	if ( ( zSig & 0x1FF ) <= 5 ) {
				3932	doubleZSig = zSig<<1;
				3933	mul64To128( zSig, zSig, &term0, &term1 );
				3934	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3935	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3936	--zSig;
				3937	doubleZSig -= 2;
				3938	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
				3939	}
				3940	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				3941	}
				3942	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
				3943
				3944	}
				3945
				3946	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	3947	\| Returns the binary log of the double-precision floating-point value `a'.
				3948	\| The operation is performed according to the IEC/IEEE Standard for Binary
				3949	\| Floating-Point Arithmetic.
				3950	----------------------------------------------------------------------------/
				3951	float64 float64_log2( float64 a STATUS_PARAM )
				3952	{
				3953	flag aSign, zSign;
				3954	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3955	uint64_t aSig, aSig0, aSig1, zSig, i;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	3956	a = float64_squash_input_denormal(a STATUS_VAR);
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	3957
				3958	aSig = extractFloat64Frac( a );
				3959	aExp = extractFloat64Exp( a );
				3960	aSign = extractFloat64Sign( a );
				3961
				3962	if ( aExp == 0 ) {
				3963	if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
				3964	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3965	}
				3966	if ( aSign ) {
				3967	float_raise( float_flag_invalid STATUS_VAR);
				3968	return float64_default_nan;
				3969	}
				3970	if ( aExp == 0x7FF ) {
				3971	if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
				3972	return a;
				3973	}
				3974
				3975	aExp -= 0x3FF;
				3976	aSig \|= LIT64( 0x0010000000000000 );
				3977	zSign = aExp < 0;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	3978	zSig = (uint64_t)aExp << 52;
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	3979	for (i = 1LL << 51; i > 0; i >>= 1) {
				3980	mul64To128( aSig, aSig, &aSig0, &aSig1 );
				3981	aSig = ( aSig0 << 12 ) \| ( aSig1 >> 52 );
				3982	if ( aSig & LIT64( 0x0020000000000000 ) ) {
				3983	aSig >>= 1;
				3984	zSig \|= i;
				3985	}
				3986	}
				3987
				3988	if ( zSign )
				3989	zSig = -zSig;
				3990	return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
				3991	}
				3992
				3993	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3994	\| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	3995	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
				3996	\| if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3997	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3998	----------------------------------------------------------------------------/
				3999
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4000	int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4001	{
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4002	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4003	a = float64_squash_input_denormal(a STATUS_VAR);
				4004	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4005
				4006	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4007	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4008	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4009	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4010	return 0;
				4011	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4012	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4013	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4014	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4015
				4016	}
				4017
				4018	/*----------------------------------------------------------------------------
				4019	\| Returns 1 if the double-precision floating-point value `a' is less than or
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4020	\| equal to the corresponding value `b', and 0 otherwise. The invalid
				4021	\| exception is raised if either operand is a NaN. The comparison is performed
				4022	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4023	----------------------------------------------------------------------------/
				4024
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4025	int float64_le( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4026	{
				4027	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4028	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4029	a = float64_squash_input_denormal(a STATUS_VAR);
				4030	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4031
				4032	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4033	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4034	) {
				4035	float_raise( float_flag_invalid STATUS_VAR);
				4036	return 0;
				4037	}
				4038	aSign = extractFloat64Sign( a );
				4039	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4040	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4041	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4042	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4043	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4044
				4045	}
				4046
				4047	/*----------------------------------------------------------------------------
				4048	\| Returns 1 if the double-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4049	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				4050	\| raised if either operand is a NaN. The comparison is performed according
				4051	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4052	----------------------------------------------------------------------------/
				4053
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4054	int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4055	{
				4056	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4057	uint64_t av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4058
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4059	a = float64_squash_input_denormal(a STATUS_VAR);
				4060	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4061	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4062	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4063	) {
				4064	float_raise( float_flag_invalid STATUS_VAR);
				4065	return 0;
				4066	}
				4067	aSign = extractFloat64Sign( a );
				4068	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4069	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4070	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4071	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4072	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4073
				4074	}
				4075
				4076	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4077	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4078	\| be compared, and 0 otherwise. The invalid exception is raised if either
				4079	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				4080	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4081	----------------------------------------------------------------------------/
				4082
				4083	int float64_unordered( float64 a, float64 b STATUS_PARAM )
				4084	{
				4085	a = float64_squash_input_denormal(a STATUS_VAR);
				4086	b = float64_squash_input_denormal(b STATUS_VAR);
				4087
				4088	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4089	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4090	) {
				4091	float_raise( float_flag_invalid STATUS_VAR);
				4092	return 1;
				4093	}
				4094	return 0;
				4095	}
				4096
				4097	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4098	\| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	4099	\| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				4100	\| exception.The comparison is performed according to the IEC/IEEE Standard
				4101	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4102	----------------------------------------------------------------------------/
				4103
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4104	int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4105	{
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4106	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4107	a = float64_squash_input_denormal(a STATUS_VAR);
				4108	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4109
				4110	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4111	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4112	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	4113	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4114	float_raise( float_flag_invalid STATUS_VAR);
				4115	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4116	return 0;
				4117	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4118	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4119	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4120	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4121
				4122	}
				4123
				4124	/*----------------------------------------------------------------------------
				4125	\| Returns 1 if the double-precision floating-point value `a' is less than or
				4126	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				4127	\| cause an exception. Otherwise, the comparison is performed according to the
				4128	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4129	----------------------------------------------------------------------------/
				4130
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4131	int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4132	{
				4133	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4134	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4135	a = float64_squash_input_denormal(a STATUS_VAR);
				4136	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4137
				4138	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4139	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4140	) {
				4141	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4142	float_raise( float_flag_invalid STATUS_VAR);
				4143	}
				4144	return 0;
				4145	}
				4146	aSign = extractFloat64Sign( a );
				4147	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4148	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4149	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4150	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<1 ) == 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4151	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4152
				4153	}
				4154
				4155	/*----------------------------------------------------------------------------
				4156	\| Returns 1 if the double-precision floating-point value `a' is less than
				4157	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				4158	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				4159	\| Standard for Binary Floating-Point Arithmetic.
				4160	----------------------------------------------------------------------------/
				4161
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4162	int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4163	{
				4164	flag aSign, bSign;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4165	uint64_t av, bv;
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	4166	a = float64_squash_input_denormal(a STATUS_VAR);
				4167	b = float64_squash_input_denormal(b STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4168
				4169	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4170	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4171	) {
				4172	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4173	float_raise( float_flag_invalid STATUS_VAR);
				4174	}
				4175	return 0;
				4176	}
				4177	aSign = extractFloat64Sign( a );
				4178	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4179	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	4180	bv = float64_val(b);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4181	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<1 ) != 0 );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	4182	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4183
				4184	}
				4185
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	4186	/*----------------------------------------------------------------------------
				4187	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
				4188	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				4189	\| comparison is performed according to the IEC/IEEE Standard for Binary
				4190	\| Floating-Point Arithmetic.
				4191	----------------------------------------------------------------------------/
				4192
				4193	int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
				4194	{
				4195	a = float64_squash_input_denormal(a STATUS_VAR);
				4196	b = float64_squash_input_denormal(b STATUS_VAR);
				4197
				4198	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				4199	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				4200	) {
				4201	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				4202	float_raise( float_flag_invalid STATUS_VAR);
				4203	}
				4204	return 1;
				4205	}
				4206	return 0;
				4207	}
				4208
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4209	/*----------------------------------------------------------------------------
				4210	\| Returns the result of converting the extended double-precision floating-
				4211	\| point value `a' to the 32-bit two's complement integer format. The
				4212	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4213	\| Floating-Point Arithmetic---which means in particular that the conversion
				4214	\| is rounded according to the current rounding mode. If `a' is a NaN, the
				4215	\| largest positive integer is returned. Otherwise, if the conversion
				4216	\| overflows, the largest integer with the same sign as `a' is returned.
				4217	----------------------------------------------------------------------------/
				4218
				4219	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
				4220	{
				4221	flag aSign;
				4222	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4223	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4224
				4225	aSig = extractFloatx80Frac( a );
				4226	aExp = extractFloatx80Exp( a );
				4227	aSign = extractFloatx80Sign( a );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4228	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4229	shiftCount = 0x4037 - aExp;
				4230	if ( shiftCount <= 0 ) shiftCount = 1;
				4231	shift64RightJamming( aSig, shiftCount, &aSig );
				4232	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				4233
				4234	}
				4235
				4236	/*----------------------------------------------------------------------------
				4237	\| Returns the result of converting the extended double-precision floating-
				4238	\| point value `a' to the 32-bit two's complement integer format. The
				4239	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4240	\| Floating-Point Arithmetic, except that the conversion is always rounded
				4241	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				4242	\| Otherwise, if the conversion overflows, the largest integer with the same
				4243	\| sign as `a' is returned.
				4244	----------------------------------------------------------------------------/
				4245
				4246	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
				4247	{
				4248	flag aSign;
				4249	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4250	uint64_t aSig, savedASig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4251	int32 z;
				4252
				4253	aSig = extractFloatx80Frac( a );
				4254	aExp = extractFloatx80Exp( a );
				4255	aSign = extractFloatx80Sign( a );
				4256	if ( 0x401E < aExp ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4257	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4258	goto invalid;
				4259	}
				4260	else if ( aExp < 0x3FFF ) {
				4261	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4262	return 0;
				4263	}
				4264	shiftCount = 0x403E - aExp;
				4265	savedASig = aSig;
				4266	aSig >>= shiftCount;
				4267	z = aSig;
				4268	if ( aSign ) z = - z;
				4269	if ( ( z < 0 ) ^ aSign ) {
				4270	invalid:
				4271	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4272	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4273	}
				4274	if ( ( aSig<<shiftCount ) != savedASig ) {
				4275	STATUS(float_exception_flags) \|= float_flag_inexact;
				4276	}
				4277	return z;
				4278
				4279	}
				4280
				4281	/*----------------------------------------------------------------------------
				4282	\| Returns the result of converting the extended double-precision floating-
				4283	\| point value `a' to the 64-bit two's complement integer format. The
				4284	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4285	\| Floating-Point Arithmetic---which means in particular that the conversion
				4286	\| is rounded according to the current rounding mode. If `a' is a NaN,
				4287	\| the largest positive integer is returned. Otherwise, if the conversion
				4288	\| overflows, the largest integer with the same sign as `a' is returned.
				4289	----------------------------------------------------------------------------/
				4290
				4291	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
				4292	{
				4293	flag aSign;
				4294	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4295	uint64_t aSig, aSigExtra;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4296
				4297	aSig = extractFloatx80Frac( a );
				4298	aExp = extractFloatx80Exp( a );
				4299	aSign = extractFloatx80Sign( a );
				4300	shiftCount = 0x403E - aExp;
				4301	if ( shiftCount <= 0 ) {
				4302	if ( shiftCount ) {
				4303	float_raise( float_flag_invalid STATUS_VAR);
				4304	if ( ! aSign
				4305	\|\| ( ( aExp == 0x7FFF )
				4306	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
				4307	) {
				4308	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4309	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4310	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4311	}
				4312	aSigExtra = 0;
				4313	}
				4314	else {
				4315	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				4316	}
				4317	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				4318
				4319	}
				4320
				4321	/*----------------------------------------------------------------------------
				4322	\| Returns the result of converting the extended double-precision floating-
				4323	\| point value `a' to the 64-bit two's complement integer format. The
				4324	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4325	\| Floating-Point Arithmetic, except that the conversion is always rounded
				4326	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				4327	\| Otherwise, if the conversion overflows, the largest integer with the same
				4328	\| sign as `a' is returned.
				4329	----------------------------------------------------------------------------/
				4330
				4331	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
				4332	{
				4333	flag aSign;
				4334	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4335	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4336	int64 z;
				4337
				4338	aSig = extractFloatx80Frac( a );
				4339	aExp = extractFloatx80Exp( a );
				4340	aSign = extractFloatx80Sign( a );
				4341	shiftCount = aExp - 0x403E;
				4342	if ( 0 <= shiftCount ) {
				4343	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
				4344	if ( ( a.high != 0xC03E ) \|\| aSig ) {
				4345	float_raise( float_flag_invalid STATUS_VAR);
				4346	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
				4347	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4348	}
				4349	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4350	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4351	}
				4352	else if ( aExp < 0x3FFF ) {
				4353	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4354	return 0;
				4355	}
				4356	z = aSig>>( - shiftCount );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4357	if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4358	STATUS(float_exception_flags) \|= float_flag_inexact;
				4359	}
				4360	if ( aSign ) z = - z;
				4361	return z;
				4362
				4363	}
				4364
				4365	/*----------------------------------------------------------------------------
				4366	\| Returns the result of converting the extended double-precision floating-
				4367	\| point value `a' to the single-precision floating-point format. The
				4368	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4369	\| Floating-Point Arithmetic.
				4370	----------------------------------------------------------------------------/
				4371
				4372	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
				4373	{
				4374	flag aSign;
				4375	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4376	uint64_t aSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4377
				4378	aSig = extractFloatx80Frac( a );
				4379	aExp = extractFloatx80Exp( a );
				4380	aSign = extractFloatx80Sign( a );
				4381	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4382	if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4383	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4384	}
				4385	return packFloat32( aSign, 0xFF, 0 );
				4386	}
				4387	shift64RightJamming( aSig, 33, &aSig );
				4388	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				4389	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
				4390
				4391	}
				4392
				4393	/*----------------------------------------------------------------------------
				4394	\| Returns the result of converting the extended double-precision floating-
				4395	\| point value `a' to the double-precision floating-point format. The
				4396	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4397	\| Floating-Point Arithmetic.
				4398	----------------------------------------------------------------------------/
				4399
				4400	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
				4401	{
				4402	flag aSign;
				4403	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4404	uint64_t aSig, zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4405
				4406	aSig = extractFloatx80Frac( a );
				4407	aExp = extractFloatx80Exp( a );
				4408	aSign = extractFloatx80Sign( a );
				4409	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4410	if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4411	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4412	}
				4413	return packFloat64( aSign, 0x7FF, 0 );
				4414	}
				4415	shift64RightJamming( aSig, 1, &zSig );
				4416	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				4417	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
				4418
				4419	}
				4420
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4421	/*----------------------------------------------------------------------------
				4422	\| Returns the result of converting the extended double-precision floating-
				4423	\| point value `a' to the quadruple-precision floating-point format. The
				4424	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4425	\| Floating-Point Arithmetic.
				4426	----------------------------------------------------------------------------/
				4427
				4428	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
				4429	{
				4430	flag aSign;
				4431	int16 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4432	uint64_t aSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4433
				4434	aSig = extractFloatx80Frac( a );
				4435	aExp = extractFloatx80Exp( a );
				4436	aSign = extractFloatx80Sign( a );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4437	if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	4438	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4439	}
				4440	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
				4441	return packFloat128( aSign, aExp, zSig0, zSig1 );
				4442
				4443	}
				4444
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4445	/*----------------------------------------------------------------------------
				4446	\| Rounds the extended double-precision floating-point value `a' to an integer,
				4447	\| and returns the result as an extended quadruple-precision floating-point
				4448	\| value. The operation is performed according to the IEC/IEEE Standard for
				4449	\| Binary Floating-Point Arithmetic.
				4450	----------------------------------------------------------------------------/
				4451
				4452	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
				4453	{
				4454	flag aSign;
				4455	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4456	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4457	int8 roundingMode;
				4458	floatx80 z;
				4459
				4460	aExp = extractFloatx80Exp( a );
				4461	if ( 0x403E <= aExp ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4462	if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4463	return propagateFloatx80NaN( a, a STATUS_VAR );
				4464	}
				4465	return a;
				4466	}
				4467	if ( aExp < 0x3FFF ) {
				4468	if ( ( aExp == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4469	&& ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4470	return a;
				4471	}
				4472	STATUS(float_exception_flags) \|= float_flag_inexact;
				4473	aSign = extractFloatx80Sign( a );
				4474	switch ( STATUS(float_rounding_mode) ) {
				4475	case float_round_nearest_even:
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4476	if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4477	) {
				4478	return
				4479	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				4480	}
				4481	break;
				4482	case float_round_down:
				4483	return
				4484	aSign ?
				4485	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				4486	: packFloatx80( 0, 0, 0 );
				4487	case float_round_up:
				4488	return
				4489	aSign ? packFloatx80( 1, 0, 0 )
				4490	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				4491	}
				4492	return packFloatx80( aSign, 0, 0 );
				4493	}
				4494	lastBitMask = 1;
				4495	lastBitMask <<= 0x403E - aExp;
				4496	roundBitsMask = lastBitMask - 1;
				4497	z = a;
				4498	roundingMode = STATUS(float_rounding_mode);
				4499	if ( roundingMode == float_round_nearest_even ) {
				4500	z.low += lastBitMask>>1;
				4501	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				4502	}
				4503	else if ( roundingMode != float_round_to_zero ) {
				4504	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				4505	z.low += roundBitsMask;
				4506	}
				4507	}
				4508	z.low &= ~ roundBitsMask;
				4509	if ( z.low == 0 ) {
				4510	++z.high;
				4511	z.low = LIT64( 0x8000000000000000 );
				4512	}
				4513	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4514	return z;
				4515
				4516	}
				4517
				4518	/*----------------------------------------------------------------------------
				4519	\| Returns the result of adding the absolute values of the extended double-
				4520	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
				4521	\| negated before being returned. `zSign' is ignored if the result is a NaN.
				4522	\| The addition is performed according to the IEC/IEEE Standard for Binary
				4523	\| Floating-Point Arithmetic.
				4524	----------------------------------------------------------------------------/
				4525
				4526	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
				4527	{
				4528	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4529	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4530	int32 expDiff;
				4531
				4532	aSig = extractFloatx80Frac( a );
				4533	aExp = extractFloatx80Exp( a );
				4534	bSig = extractFloatx80Frac( b );
				4535	bExp = extractFloatx80Exp( b );
				4536	expDiff = aExp - bExp;
				4537	if ( 0 < expDiff ) {
				4538	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4539	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4540	return a;
				4541	}
				4542	if ( bExp == 0 ) --expDiff;
				4543	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				4544	zExp = aExp;
				4545	}
				4546	else if ( expDiff < 0 ) {
				4547	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4548	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4549	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4550	}
				4551	if ( aExp == 0 ) ++expDiff;
				4552	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				4553	zExp = bExp;
				4554	}
				4555	else {
				4556	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4557	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4558	return propagateFloatx80NaN( a, b STATUS_VAR );
				4559	}
				4560	return a;
				4561	}
				4562	zSig1 = 0;
				4563	zSig0 = aSig + bSig;
				4564	if ( aExp == 0 ) {
				4565	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				4566	goto roundAndPack;
				4567	}
				4568	zExp = aExp;
				4569	goto shiftRight1;
				4570	}
				4571	zSig0 = aSig + bSig;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4572	if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4573	shiftRight1:
				4574	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				4575	zSig0 \|= LIT64( 0x8000000000000000 );
				4576	++zExp;
				4577	roundAndPack:
				4578	return
				4579	roundAndPackFloatx80(
				4580	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4581
				4582	}
				4583
				4584	/*----------------------------------------------------------------------------
				4585	\| Returns the result of subtracting the absolute values of the extended
				4586	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
				4587	\| difference is negated before being returned. `zSign' is ignored if the
				4588	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				4589	\| Standard for Binary Floating-Point Arithmetic.
				4590	----------------------------------------------------------------------------/
				4591
				4592	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
				4593	{
				4594	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4595	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4596	int32 expDiff;
				4597	floatx80 z;
				4598
				4599	aSig = extractFloatx80Frac( a );
				4600	aExp = extractFloatx80Exp( a );
				4601	bSig = extractFloatx80Frac( b );
				4602	bExp = extractFloatx80Exp( b );
				4603	expDiff = aExp - bExp;
				4604	if ( 0 < expDiff ) goto aExpBigger;
				4605	if ( expDiff < 0 ) goto bExpBigger;
				4606	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4607	if ( (uint64_t) ( ( aSig \| bSig )<<1 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4608	return propagateFloatx80NaN( a, b STATUS_VAR );
				4609	}
				4610	float_raise( float_flag_invalid STATUS_VAR);
				4611	z.low = floatx80_default_nan_low;
				4612	z.high = floatx80_default_nan_high;
				4613	return z;
				4614	}
				4615	if ( aExp == 0 ) {
				4616	aExp = 1;
				4617	bExp = 1;
				4618	}
				4619	zSig1 = 0;
				4620	if ( bSig < aSig ) goto aBigger;
				4621	if ( aSig < bSig ) goto bBigger;
				4622	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				4623	bExpBigger:
				4624	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4625	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4626	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4627	}
				4628	if ( aExp == 0 ) ++expDiff;
				4629	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				4630	bBigger:
				4631	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				4632	zExp = bExp;
				4633	zSign ^= 1;
				4634	goto normalizeRoundAndPack;
				4635	aExpBigger:
				4636	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4637	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4638	return a;
				4639	}
				4640	if ( bExp == 0 ) --expDiff;
				4641	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				4642	aBigger:
				4643	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				4644	zExp = aExp;
				4645	normalizeRoundAndPack:
				4646	return
				4647	normalizeRoundAndPackFloatx80(
				4648	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4649
				4650	}
				4651
				4652	/*----------------------------------------------------------------------------
				4653	\| Returns the result of adding the extended double-precision floating-point
				4654	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4655	\| Standard for Binary Floating-Point Arithmetic.
				4656	----------------------------------------------------------------------------/
				4657
				4658	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
				4659	{
				4660	flag aSign, bSign;
				4661
				4662	aSign = extractFloatx80Sign( a );
				4663	bSign = extractFloatx80Sign( b );
				4664	if ( aSign == bSign ) {
				4665	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				4666	}
				4667	else {
				4668	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				4669	}
				4670
				4671	}
				4672
				4673	/*----------------------------------------------------------------------------
				4674	\| Returns the result of subtracting the extended double-precision floating-
				4675	\| point values `a' and `b'. The operation is performed according to the
				4676	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4677	----------------------------------------------------------------------------/
				4678
				4679	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
				4680	{
				4681	flag aSign, bSign;
				4682
				4683	aSign = extractFloatx80Sign( a );
				4684	bSign = extractFloatx80Sign( b );
				4685	if ( aSign == bSign ) {
				4686	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				4687	}
				4688	else {
				4689	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				4690	}
				4691
				4692	}
				4693
				4694	/*----------------------------------------------------------------------------
				4695	\| Returns the result of multiplying the extended double-precision floating-
				4696	\| point values `a' and `b'. The operation is performed according to the
				4697	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4698	----------------------------------------------------------------------------/
				4699
				4700	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
				4701	{
				4702	flag aSign, bSign, zSign;
				4703	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4704	uint64_t aSig, bSig, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4705	floatx80 z;
				4706
				4707	aSig = extractFloatx80Frac( a );
				4708	aExp = extractFloatx80Exp( a );
				4709	aSign = extractFloatx80Sign( a );
				4710	bSig = extractFloatx80Frac( b );
				4711	bExp = extractFloatx80Exp( b );
				4712	bSign = extractFloatx80Sign( b );
				4713	zSign = aSign ^ bSign;
				4714	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4715	if ( (uint64_t) ( aSig<<1 )
				4716	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4717	return propagateFloatx80NaN( a, b STATUS_VAR );
				4718	}
				4719	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				4720	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4721	}
				4722	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4723	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4724	if ( ( aExp \| aSig ) == 0 ) {
				4725	invalid:
				4726	float_raise( float_flag_invalid STATUS_VAR);
				4727	z.low = floatx80_default_nan_low;
				4728	z.high = floatx80_default_nan_high;
				4729	return z;
				4730	}
				4731	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4732	}
				4733	if ( aExp == 0 ) {
				4734	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				4735	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				4736	}
				4737	if ( bExp == 0 ) {
				4738	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				4739	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				4740	}
				4741	zExp = aExp + bExp - 0x3FFE;
				4742	mul64To128( aSig, bSig, &zSig0, &zSig1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4743	if ( 0 < (int64_t) zSig0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4744	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				4745	--zExp;
				4746	}
				4747	return
				4748	roundAndPackFloatx80(
				4749	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4750
				4751	}
				4752
				4753	/*----------------------------------------------------------------------------
				4754	\| Returns the result of dividing the extended double-precision floating-point
				4755	\| value `a' by the corresponding value `b'. The operation is performed
				4756	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4757	----------------------------------------------------------------------------/
				4758
				4759	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
				4760	{
				4761	flag aSign, bSign, zSign;
				4762	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4763	uint64_t aSig, bSig, zSig0, zSig1;
				4764	uint64_t rem0, rem1, rem2, term0, term1, term2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4765	floatx80 z;
				4766
				4767	aSig = extractFloatx80Frac( a );
				4768	aExp = extractFloatx80Exp( a );
				4769	aSign = extractFloatx80Sign( a );
				4770	bSig = extractFloatx80Frac( b );
				4771	bExp = extractFloatx80Exp( b );
				4772	bSign = extractFloatx80Sign( b );
				4773	zSign = aSign ^ bSign;
				4774	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4775	if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4776	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4777	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4778	goto invalid;
				4779	}
				4780	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4781	}
				4782	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4783	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4784	return packFloatx80( zSign, 0, 0 );
				4785	}
				4786	if ( bExp == 0 ) {
				4787	if ( bSig == 0 ) {
				4788	if ( ( aExp \| aSig ) == 0 ) {
				4789	invalid:
				4790	float_raise( float_flag_invalid STATUS_VAR);
				4791	z.low = floatx80_default_nan_low;
				4792	z.high = floatx80_default_nan_high;
				4793	return z;
				4794	}
				4795	float_raise( float_flag_divbyzero STATUS_VAR);
				4796	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4797	}
				4798	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				4799	}
				4800	if ( aExp == 0 ) {
				4801	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				4802	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				4803	}
				4804	zExp = aExp - bExp + 0x3FFE;
				4805	rem1 = 0;
				4806	if ( bSig <= aSig ) {
				4807	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				4808	++zExp;
				4809	}
				4810	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				4811	mul64To128( bSig, zSig0, &term0, &term1 );
				4812	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4813	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4814	--zSig0;
				4815	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				4816	}
				4817	zSig1 = estimateDiv128To64( rem1, 0, bSig );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4818	if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4819	mul64To128( bSig, zSig1, &term1, &term2 );
				4820	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4821	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4822	--zSig1;
				4823	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				4824	}
				4825	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				4826	}
				4827	return
				4828	roundAndPackFloatx80(
				4829	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				4830
				4831	}
				4832
				4833	/*----------------------------------------------------------------------------
				4834	\| Returns the remainder of the extended double-precision floating-point value
				4835	\| `a' with respect to the corresponding value `b'. The operation is performed
				4836	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4837	----------------------------------------------------------------------------/
				4838
				4839	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
				4840	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	4841	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4842	int32 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4843	uint64_t aSig0, aSig1, bSig;
				4844	uint64_t q, term0, term1, alternateASig0, alternateASig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4845	floatx80 z;
				4846
				4847	aSig0 = extractFloatx80Frac( a );
				4848	aExp = extractFloatx80Exp( a );
				4849	aSign = extractFloatx80Sign( a );
				4850	bSig = extractFloatx80Frac( b );
				4851	bExp = extractFloatx80Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4852	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4853	if ( (uint64_t) ( aSig0<<1 )
				4854	\|\| ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4855	return propagateFloatx80NaN( a, b STATUS_VAR );
				4856	}
				4857	goto invalid;
				4858	}
				4859	if ( bExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4860	if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4861	return a;
				4862	}
				4863	if ( bExp == 0 ) {
				4864	if ( bSig == 0 ) {
				4865	invalid:
				4866	float_raise( float_flag_invalid STATUS_VAR);
				4867	z.low = floatx80_default_nan_low;
				4868	z.high = floatx80_default_nan_high;
				4869	return z;
				4870	}
				4871	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				4872	}
				4873	if ( aExp == 0 ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4874	if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4875	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				4876	}
				4877	bSig \|= LIT64( 0x8000000000000000 );
				4878	zSign = aSign;
				4879	expDiff = aExp - bExp;
				4880	aSig1 = 0;
				4881	if ( expDiff < 0 ) {
				4882	if ( expDiff < -1 ) return a;
				4883	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				4884	expDiff = 0;
				4885	}
				4886	q = ( bSig <= aSig0 );
				4887	if ( q ) aSig0 -= bSig;
				4888	expDiff -= 64;
				4889	while ( 0 < expDiff ) {
				4890	q = estimateDiv128To64( aSig0, aSig1, bSig );
				4891	q = ( 2 < q ) ? q - 2 : 0;
				4892	mul64To128( bSig, q, &term0, &term1 );
				4893	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				4894	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				4895	expDiff -= 62;
				4896	}
				4897	expDiff += 64;
				4898	if ( 0 < expDiff ) {
				4899	q = estimateDiv128To64( aSig0, aSig1, bSig );
				4900	q = ( 2 < q ) ? q - 2 : 0;
				4901	q >>= 64 - expDiff;
				4902	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				4903	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				4904	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				4905	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				4906	++q;
				4907	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				4908	}
				4909	}
				4910	else {
				4911	term1 = 0;
				4912	term0 = bSig;
				4913	}
				4914	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				4915	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				4916	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				4917	&& ( q & 1 ) )
				4918	) {
				4919	aSig0 = alternateASig0;
				4920	aSig1 = alternateASig1;
				4921	zSign = ! zSign;
				4922	}
				4923	return
				4924	normalizeRoundAndPackFloatx80(
				4925	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
				4926
				4927	}
				4928
				4929	/*----------------------------------------------------------------------------
				4930	\| Returns the square root of the extended double-precision floating-point
				4931	\| value `a'. The operation is performed according to the IEC/IEEE Standard
				4932	\| for Binary Floating-Point Arithmetic.
				4933	----------------------------------------------------------------------------/
				4934
				4935	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
				4936	{
				4937	flag aSign;
				4938	int32 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4939	uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
				4940	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4941	floatx80 z;
				4942
				4943	aSig0 = extractFloatx80Frac( a );
				4944	aExp = extractFloatx80Exp( a );
				4945	aSign = extractFloatx80Sign( a );
				4946	if ( aExp == 0x7FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4947	if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4948	if ( ! aSign ) return a;
				4949	goto invalid;
				4950	}
				4951	if ( aSign ) {
				4952	if ( ( aExp \| aSig0 ) == 0 ) return a;
				4953	invalid:
				4954	float_raise( float_flag_invalid STATUS_VAR);
				4955	z.low = floatx80_default_nan_low;
				4956	z.high = floatx80_default_nan_high;
				4957	return z;
				4958	}
				4959	if ( aExp == 0 ) {
				4960	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				4961	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				4962	}
				4963	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				4964	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				4965	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
				4966	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				4967	doubleZSig0 = zSig0<<1;
				4968	mul64To128( zSig0, zSig0, &term0, &term1 );
				4969	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4970	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4971	--zSig0;
				4972	doubleZSig0 -= 2;
				4973	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				4974	}
				4975	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				4976	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
				4977	if ( zSig1 == 0 ) zSig1 = 1;
				4978	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				4979	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				4980	mul64To128( zSig1, zSig1, &term2, &term3 );
				4981	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	4982	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4983	--zSig1;
				4984	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				4985	term3 \|= 1;
				4986	term2 \|= doubleZSig0;
				4987	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				4988	}
				4989	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				4990	}
				4991	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
				4992	zSig0 \|= doubleZSig0;
				4993	return
				4994	roundAndPackFloatx80(
				4995	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
				4996
				4997	}
				4998
				4999	/*----------------------------------------------------------------------------
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5000	\| Returns 1 if the extended double-precision floating-point value `a' is equal
				5001	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
				5002	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				5003	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5004	----------------------------------------------------------------------------/
				5005
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5006	int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5007	{
				5008
				5009	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5010	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5011	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5012	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5013	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5014	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5015	return 0;
				5016	}
				5017	return
				5018	( a.low == b.low )
				5019	&& ( ( a.high == b.high )
				5020	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5021	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5022	);
				5023
				5024	}
				5025
				5026	/*----------------------------------------------------------------------------
				5027	\| Returns 1 if the extended double-precision floating-point value `a' is
				5028	\| less than or equal to the corresponding value `b', and 0 otherwise. The
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5029	\| invalid exception is raised if either operand is a NaN. The comparison is
				5030	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				5031	\| Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5032	----------------------------------------------------------------------------/
				5033
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5034	int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5035	{
				5036	flag aSign, bSign;
				5037
				5038	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5039	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5040	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5041	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5042	) {
				5043	float_raise( float_flag_invalid STATUS_VAR);
				5044	return 0;
				5045	}
				5046	aSign = extractFloatx80Sign( a );
				5047	bSign = extractFloatx80Sign( b );
				5048	if ( aSign != bSign ) {
				5049	return
				5050	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5051	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5052	== 0 );
				5053	}
				5054	return
				5055	aSign ? le128( b.high, b.low, a.high, a.low )
				5056	: le128( a.high, a.low, b.high, b.low );
				5057
				5058	}
				5059
				5060	/*----------------------------------------------------------------------------
				5061	\| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5062	\| less than the corresponding value `b', and 0 otherwise. The invalid
				5063	\| exception is raised if either operand is a NaN. The comparison is performed
				5064	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5065	----------------------------------------------------------------------------/
				5066
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5067	int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5068	{
				5069	flag aSign, bSign;
				5070
				5071	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5072	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5073	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5074	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5075	) {
				5076	float_raise( float_flag_invalid STATUS_VAR);
				5077	return 0;
				5078	}
				5079	aSign = extractFloatx80Sign( a );
				5080	bSign = extractFloatx80Sign( b );
				5081	if ( aSign != bSign ) {
				5082	return
				5083	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5084	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5085	!= 0 );
				5086	}
				5087	return
				5088	aSign ? lt128( b.high, b.low, a.high, a.low )
				5089	: lt128( a.high, a.low, b.high, b.low );
				5090
				5091	}
				5092
				5093	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5094	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5095	\| cannot be compared, and 0 otherwise. The invalid exception is raised if
				5096	\| either operand is a NaN. The comparison is performed according to the
				5097	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5098	----------------------------------------------------------------------------/
				5099	int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
				5100	{
				5101	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				5102	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
				5103	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				5104	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
				5105	) {
				5106	float_raise( float_flag_invalid STATUS_VAR);
				5107	return 1;
				5108	}
				5109	return 0;
				5110	}
				5111
				5112	/*----------------------------------------------------------------------------
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5113	\| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	5114	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				5115	\| cause an exception. The comparison is performed according to the IEC/IEEE
				5116	\| Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5117	----------------------------------------------------------------------------/
				5118
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5119	int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5120	{
				5121
				5122	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5123	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5124	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5125	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5126	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	5127	if ( floatx80_is_signaling_nan( a )
				5128	\|\| floatx80_is_signaling_nan( b ) ) {
				5129	float_raise( float_flag_invalid STATUS_VAR);
				5130	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5131	return 0;
				5132	}
				5133	return
				5134	( a.low == b.low )
				5135	&& ( ( a.high == b.high )
				5136	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5137	&& ( (uint16_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5138	);
				5139
				5140	}
				5141
				5142	/*----------------------------------------------------------------------------
				5143	\| Returns 1 if the extended double-precision floating-point value `a' is less
				5144	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				5145	\| do not cause an exception. Otherwise, the comparison is performed according
				5146	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5147	----------------------------------------------------------------------------/
				5148
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5149	int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5150	{
				5151	flag aSign, bSign;
				5152
				5153	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5154	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5155	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5156	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5157	) {
				5158	if ( floatx80_is_signaling_nan( a )
				5159	\|\| floatx80_is_signaling_nan( b ) ) {
				5160	float_raise( float_flag_invalid STATUS_VAR);
				5161	}
				5162	return 0;
				5163	}
				5164	aSign = extractFloatx80Sign( a );
				5165	bSign = extractFloatx80Sign( b );
				5166	if ( aSign != bSign ) {
				5167	return
				5168	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5169	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5170	== 0 );
				5171	}
				5172	return
				5173	aSign ? le128( b.high, b.low, a.high, a.low )
				5174	: le128( a.high, a.low, b.high, b.low );
				5175
				5176	}
				5177
				5178	/*----------------------------------------------------------------------------
				5179	\| Returns 1 if the extended double-precision floating-point value `a' is less
				5180	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				5181	\| an exception. Otherwise, the comparison is performed according to the
				5182	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5183	----------------------------------------------------------------------------/
				5184
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5185	int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5186	{
				5187	flag aSign, bSign;
				5188
				5189	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5190	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5191	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5192	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5193	) {
				5194	if ( floatx80_is_signaling_nan( a )
				5195	\|\| floatx80_is_signaling_nan( b ) ) {
				5196	float_raise( float_flag_invalid STATUS_VAR);
				5197	}
				5198	return 0;
				5199	}
				5200	aSign = extractFloatx80Sign( a );
				5201	bSign = extractFloatx80Sign( b );
				5202	if ( aSign != bSign ) {
				5203	return
				5204	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5205	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5206	!= 0 );
				5207	}
				5208	return
				5209	aSign ? lt128( b.high, b.low, a.high, a.low )
				5210	: lt128( a.high, a.low, b.high, b.low );
				5211
				5212	}
				5213
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	5214	/*----------------------------------------------------------------------------
				5215	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
				5216	\| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
				5217	\| The comparison is performed according to the IEC/IEEE Standard for Binary
				5218	\| Floating-Point Arithmetic.
				5219	----------------------------------------------------------------------------/
				5220	int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
				5221	{
				5222	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				5223	&& (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
				5224	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				5225	&& (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
				5226	) {
				5227	if ( floatx80_is_signaling_nan( a )
				5228	\|\| floatx80_is_signaling_nan( b ) ) {
				5229	float_raise( float_flag_invalid STATUS_VAR);
				5230	}
				5231	return 1;
				5232	}
				5233	return 0;
				5234	}
				5235
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5236	/*----------------------------------------------------------------------------
				5237	\| Returns the result of converting the quadruple-precision floating-point
				5238	\| value `a' to the 32-bit two's complement integer format. The conversion
				5239	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5240	\| Arithmetic---which means in particular that the conversion is rounded
				5241	\| according to the current rounding mode. If `a' is a NaN, the largest
				5242	\| positive integer is returned. Otherwise, if the conversion overflows, the
				5243	\| largest integer with the same sign as `a' is returned.
				5244	----------------------------------------------------------------------------/
				5245
				5246	int32 float128_to_int32( float128 a STATUS_PARAM )
				5247	{
				5248	flag aSign;
				5249	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5250	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5251
				5252	aSig1 = extractFloat128Frac1( a );
				5253	aSig0 = extractFloat128Frac0( a );
				5254	aExp = extractFloat128Exp( a );
				5255	aSign = extractFloat128Sign( a );
				5256	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
				5257	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5258	aSig0 \|= ( aSig1 != 0 );
				5259	shiftCount = 0x4028 - aExp;
				5260	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
				5261	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
				5262
				5263	}
				5264
				5265	/*----------------------------------------------------------------------------
				5266	\| Returns the result of converting the quadruple-precision floating-point
				5267	\| value `a' to the 32-bit two's complement integer format. The conversion
				5268	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5269	\| Arithmetic, except that the conversion is always rounded toward zero. If
				5270	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				5271	\| conversion overflows, the largest integer with the same sign as `a' is
				5272	\| returned.
				5273	----------------------------------------------------------------------------/
				5274
				5275	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
				5276	{
				5277	flag aSign;
				5278	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5279	uint64_t aSig0, aSig1, savedASig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5280	int32 z;
				5281
				5282	aSig1 = extractFloat128Frac1( a );
				5283	aSig0 = extractFloat128Frac0( a );
				5284	aExp = extractFloat128Exp( a );
				5285	aSign = extractFloat128Sign( a );
				5286	aSig0 \|= ( aSig1 != 0 );
				5287	if ( 0x401E < aExp ) {
				5288	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
				5289	goto invalid;
				5290	}
				5291	else if ( aExp < 0x3FFF ) {
				5292	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				5293	return 0;
				5294	}
				5295	aSig0 \|= LIT64( 0x0001000000000000 );
				5296	shiftCount = 0x402F - aExp;
				5297	savedASig = aSig0;
				5298	aSig0 >>= shiftCount;
				5299	z = aSig0;
				5300	if ( aSign ) z = - z;
				5301	if ( ( z < 0 ) ^ aSign ) {
				5302	invalid:
				5303	float_raise( float_flag_invalid STATUS_VAR);
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5304	return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5305	}
				5306	if ( ( aSig0<<shiftCount ) != savedASig ) {
				5307	STATUS(float_exception_flags) \|= float_flag_inexact;
				5308	}
				5309	return z;
				5310
				5311	}
				5312
				5313	/*----------------------------------------------------------------------------
				5314	\| Returns the result of converting the quadruple-precision floating-point
				5315	\| value `a' to the 64-bit two's complement integer format. The conversion
				5316	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5317	\| Arithmetic---which means in particular that the conversion is rounded
				5318	\| according to the current rounding mode. If `a' is a NaN, the largest
				5319	\| positive integer is returned. Otherwise, if the conversion overflows, the
				5320	\| largest integer with the same sign as `a' is returned.
				5321	----------------------------------------------------------------------------/
				5322
				5323	int64 float128_to_int64( float128 a STATUS_PARAM )
				5324	{
				5325	flag aSign;
				5326	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5327	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5328
				5329	aSig1 = extractFloat128Frac1( a );
				5330	aSig0 = extractFloat128Frac0( a );
				5331	aExp = extractFloat128Exp( a );
				5332	aSign = extractFloat128Sign( a );
				5333	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5334	shiftCount = 0x402F - aExp;
				5335	if ( shiftCount <= 0 ) {
				5336	if ( 0x403E < aExp ) {
				5337	float_raise( float_flag_invalid STATUS_VAR);
				5338	if ( ! aSign
				5339	\|\| ( ( aExp == 0x7FFF )
				5340	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
				5341	)
				5342	) {
				5343	return LIT64( 0x7FFFFFFFFFFFFFFF );
				5344	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5345	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5346	}
				5347	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
				5348	}
				5349	else {
				5350	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
				5351	}
				5352	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
				5353
				5354	}
				5355
				5356	/*----------------------------------------------------------------------------
				5357	\| Returns the result of converting the quadruple-precision floating-point
				5358	\| value `a' to the 64-bit two's complement integer format. The conversion
				5359	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5360	\| Arithmetic, except that the conversion is always rounded toward zero.
				5361	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				5362	\| the conversion overflows, the largest integer with the same sign as `a' is
				5363	\| returned.
				5364	----------------------------------------------------------------------------/
				5365
				5366	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
				5367	{
				5368	flag aSign;
				5369	int32 aExp, shiftCount;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5370	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5371	int64 z;
				5372
				5373	aSig1 = extractFloat128Frac1( a );
				5374	aSig0 = extractFloat128Frac0( a );
				5375	aExp = extractFloat128Exp( a );
				5376	aSign = extractFloat128Sign( a );
				5377	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				5378	shiftCount = aExp - 0x402F;
				5379	if ( 0 < shiftCount ) {
				5380	if ( 0x403E <= aExp ) {
				5381	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
				5382	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
				5383	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
				5384	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				5385	}
				5386	else {
				5387	float_raise( float_flag_invalid STATUS_VAR);
				5388	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
				5389	return LIT64( 0x7FFFFFFFFFFFFFFF );
				5390	}
				5391	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5392	return (int64_t) LIT64( 0x8000000000000000 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5393	}
				5394	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5395	if ( (uint64_t) ( aSig1<<shiftCount ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5396	STATUS(float_exception_flags) \|= float_flag_inexact;
				5397	}
				5398	}
				5399	else {
				5400	if ( aExp < 0x3FFF ) {
				5401	if ( aExp \| aSig0 \| aSig1 ) {
				5402	STATUS(float_exception_flags) \|= float_flag_inexact;
				5403	}
				5404	return 0;
				5405	}
				5406	z = aSig0>>( - shiftCount );
				5407	if ( aSig1
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5408	\|\| ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5409	STATUS(float_exception_flags) \|= float_flag_inexact;
				5410	}
				5411	}
				5412	if ( aSign ) z = - z;
				5413	return z;
				5414
				5415	}
				5416
				5417	/*----------------------------------------------------------------------------
				5418	\| Returns the result of converting the quadruple-precision floating-point
				5419	\| value `a' to the single-precision floating-point format. The conversion
				5420	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5421	\| Arithmetic.
				5422	----------------------------------------------------------------------------/
				5423
				5424	float32 float128_to_float32( float128 a STATUS_PARAM )
				5425	{
				5426	flag aSign;
				5427	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5428	uint64_t aSig0, aSig1;
				5429	uint32_t zSig;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5430
				5431	aSig1 = extractFloat128Frac1( a );
				5432	aSig0 = extractFloat128Frac0( a );
				5433	aExp = extractFloat128Exp( a );
				5434	aSign = extractFloat128Sign( a );
				5435	if ( aExp == 0x7FFF ) {
				5436	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5437	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5438	}
				5439	return packFloat32( aSign, 0xFF, 0 );
				5440	}
				5441	aSig0 \|= ( aSig1 != 0 );
				5442	shift64RightJamming( aSig0, 18, &aSig0 );
				5443	zSig = aSig0;
				5444	if ( aExp \|\| zSig ) {
				5445	zSig \|= 0x40000000;
				5446	aExp -= 0x3F81;
				5447	}
				5448	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				5449
				5450	}
				5451
				5452	/*----------------------------------------------------------------------------
				5453	\| Returns the result of converting the quadruple-precision floating-point
				5454	\| value `a' to the double-precision floating-point format. The conversion
				5455	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5456	\| Arithmetic.
				5457	----------------------------------------------------------------------------/
				5458
				5459	float64 float128_to_float64( float128 a STATUS_PARAM )
				5460	{
				5461	flag aSign;
				5462	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5463	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5464
				5465	aSig1 = extractFloat128Frac1( a );
				5466	aSig0 = extractFloat128Frac0( a );
				5467	aExp = extractFloat128Exp( a );
				5468	aSign = extractFloat128Sign( a );
				5469	if ( aExp == 0x7FFF ) {
				5470	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5471	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5472	}
				5473	return packFloat64( aSign, 0x7FF, 0 );
				5474	}
				5475	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				5476	aSig0 \|= ( aSig1 != 0 );
				5477	if ( aExp \|\| aSig0 ) {
				5478	aSig0 \|= LIT64( 0x4000000000000000 );
				5479	aExp -= 0x3C01;
				5480	}
				5481	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
				5482
				5483	}
				5484
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5485	/*----------------------------------------------------------------------------
				5486	\| Returns the result of converting the quadruple-precision floating-point
				5487	\| value `a' to the extended double-precision floating-point format. The
				5488	\| conversion is performed according to the IEC/IEEE Standard for Binary
				5489	\| Floating-Point Arithmetic.
				5490	----------------------------------------------------------------------------/
				5491
				5492	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
				5493	{
				5494	flag aSign;
				5495	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5496	uint64_t aSig0, aSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5497
				5498	aSig1 = extractFloat128Frac1( a );
				5499	aSig0 = extractFloat128Frac0( a );
				5500	aExp = extractFloat128Exp( a );
				5501	aSign = extractFloat128Sign( a );
				5502	if ( aExp == 0x7FFF ) {
				5503	if ( aSig0 \| aSig1 ) {
Christophe Lyon	bcd4d9a	2011-02-10 11:28:57 +0000	[diff] [blame]	5504	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5505	}
				5506	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				5507	}
				5508	if ( aExp == 0 ) {
				5509	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
				5510	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5511	}
				5512	else {
				5513	aSig0 \|= LIT64( 0x0001000000000000 );
				5514	}
				5515	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
				5516	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
				5517
				5518	}
				5519
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5520	/*----------------------------------------------------------------------------
				5521	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
				5522	\| returns the result as a quadruple-precision floating-point value. The
				5523	\| operation is performed according to the IEC/IEEE Standard for Binary
				5524	\| Floating-Point Arithmetic.
				5525	----------------------------------------------------------------------------/
				5526
				5527	float128 float128_round_to_int( float128 a STATUS_PARAM )
				5528	{
				5529	flag aSign;
				5530	int32 aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5531	uint64_t lastBitMask, roundBitsMask;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5532	int8 roundingMode;
				5533	float128 z;
				5534
				5535	aExp = extractFloat128Exp( a );
				5536	if ( 0x402F <= aExp ) {
				5537	if ( 0x406F <= aExp ) {
				5538	if ( ( aExp == 0x7FFF )
				5539	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
				5540	) {
				5541	return propagateFloat128NaN( a, a STATUS_VAR );
				5542	}
				5543	return a;
				5544	}
				5545	lastBitMask = 1;
				5546	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
				5547	roundBitsMask = lastBitMask - 1;
				5548	z = a;
				5549	roundingMode = STATUS(float_rounding_mode);
				5550	if ( roundingMode == float_round_nearest_even ) {
				5551	if ( lastBitMask ) {
				5552	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
				5553	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				5554	}
				5555	else {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5556	if ( (int64_t) z.low < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5557	++z.high;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5558	if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5559	}
				5560	}
				5561	}
				5562	else if ( roundingMode != float_round_to_zero ) {
				5563	if ( extractFloat128Sign( z )
				5564	^ ( roundingMode == float_round_up ) ) {
				5565	add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
				5566	}
				5567	}
				5568	z.low &= ~ roundBitsMask;
				5569	}
				5570	else {
				5571	if ( aExp < 0x3FFF ) {
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5572	if ( ( ( (uint64_t) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5573	STATUS(float_exception_flags) \|= float_flag_inexact;
				5574	aSign = extractFloat128Sign( a );
				5575	switch ( STATUS(float_rounding_mode) ) {
				5576	case float_round_nearest_even:
				5577	if ( ( aExp == 0x3FFE )
				5578	&& ( extractFloat128Frac0( a )
				5579	\| extractFloat128Frac1( a ) )
				5580	) {
				5581	return packFloat128( aSign, 0x3FFF, 0, 0 );
				5582	}
				5583	break;
				5584	case float_round_down:
				5585	return
				5586	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
				5587	: packFloat128( 0, 0, 0, 0 );
				5588	case float_round_up:
				5589	return
				5590	aSign ? packFloat128( 1, 0, 0, 0 )
				5591	: packFloat128( 0, 0x3FFF, 0, 0 );
				5592	}
				5593	return packFloat128( aSign, 0, 0, 0 );
				5594	}
				5595	lastBitMask = 1;
				5596	lastBitMask <<= 0x402F - aExp;
				5597	roundBitsMask = lastBitMask - 1;
				5598	z.low = 0;
				5599	z.high = a.high;
				5600	roundingMode = STATUS(float_rounding_mode);
				5601	if ( roundingMode == float_round_nearest_even ) {
				5602	z.high += lastBitMask>>1;
				5603	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
				5604	z.high &= ~ lastBitMask;
				5605	}
				5606	}
				5607	else if ( roundingMode != float_round_to_zero ) {
				5608	if ( extractFloat128Sign( z )
				5609	^ ( roundingMode == float_round_up ) ) {
				5610	z.high \|= ( a.low != 0 );
				5611	z.high += roundBitsMask;
				5612	}
				5613	}
				5614	z.high &= ~ roundBitsMask;
				5615	}
				5616	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
				5617	STATUS(float_exception_flags) \|= float_flag_inexact;
				5618	}
				5619	return z;
				5620
				5621	}
				5622
				5623	/*----------------------------------------------------------------------------
				5624	\| Returns the result of adding the absolute values of the quadruple-precision
				5625	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				5626	\| before being returned. `zSign' is ignored if the result is a NaN.
				5627	\| The addition is performed according to the IEC/IEEE Standard for Binary
				5628	\| Floating-Point Arithmetic.
				5629	----------------------------------------------------------------------------/
				5630
				5631	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				5632	{
				5633	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5634	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5635	int32 expDiff;
				5636
				5637	aSig1 = extractFloat128Frac1( a );
				5638	aSig0 = extractFloat128Frac0( a );
				5639	aExp = extractFloat128Exp( a );
				5640	bSig1 = extractFloat128Frac1( b );
				5641	bSig0 = extractFloat128Frac0( b );
				5642	bExp = extractFloat128Exp( b );
				5643	expDiff = aExp - bExp;
				5644	if ( 0 < expDiff ) {
				5645	if ( aExp == 0x7FFF ) {
				5646	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5647	return a;
				5648	}
				5649	if ( bExp == 0 ) {
				5650	--expDiff;
				5651	}
				5652	else {
				5653	bSig0 \|= LIT64( 0x0001000000000000 );
				5654	}
				5655	shift128ExtraRightJamming(
				5656	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
				5657	zExp = aExp;
				5658	}
				5659	else if ( expDiff < 0 ) {
				5660	if ( bExp == 0x7FFF ) {
				5661	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5662	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5663	}
				5664	if ( aExp == 0 ) {
				5665	++expDiff;
				5666	}
				5667	else {
				5668	aSig0 \|= LIT64( 0x0001000000000000 );
				5669	}
				5670	shift128ExtraRightJamming(
				5671	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
				5672	zExp = bExp;
				5673	}
				5674	else {
				5675	if ( aExp == 0x7FFF ) {
				5676	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				5677	return propagateFloat128NaN( a, b STATUS_VAR );
				5678	}
				5679	return a;
				5680	}
				5681	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	5682	if ( aExp == 0 ) {
Peter Maydell	e6afc87	2011-05-19 14:46:17 +0100	[diff] [blame]	5683	if (STATUS(flush_to_zero)) {
				5684	if (zSig0 \| zSig1) {
				5685	float_raise(float_flag_output_denormal STATUS_VAR);
				5686	}
				5687	return packFloat128(zSign, 0, 0, 0);
				5688	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	5689	return packFloat128( zSign, 0, zSig0, zSig1 );
				5690	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5691	zSig2 = 0;
				5692	zSig0 \|= LIT64( 0x0002000000000000 );
				5693	zExp = aExp;
				5694	goto shiftRight1;
				5695	}
				5696	aSig0 \|= LIT64( 0x0001000000000000 );
				5697	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				5698	--zExp;
				5699	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
				5700	++zExp;
				5701	shiftRight1:
				5702	shift128ExtraRightJamming(
				5703	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				5704	roundAndPack:
				5705	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5706
				5707	}
				5708
				5709	/*----------------------------------------------------------------------------
				5710	\| Returns the result of subtracting the absolute values of the quadruple-
				5711	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				5712	\| difference is negated before being returned. `zSign' is ignored if the
				5713	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				5714	\| Standard for Binary Floating-Point Arithmetic.
				5715	----------------------------------------------------------------------------/
				5716
				5717	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				5718	{
				5719	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5720	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5721	int32 expDiff;
				5722	float128 z;
				5723
				5724	aSig1 = extractFloat128Frac1( a );
				5725	aSig0 = extractFloat128Frac0( a );
				5726	aExp = extractFloat128Exp( a );
				5727	bSig1 = extractFloat128Frac1( b );
				5728	bSig0 = extractFloat128Frac0( b );
				5729	bExp = extractFloat128Exp( b );
				5730	expDiff = aExp - bExp;
				5731	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				5732	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
				5733	if ( 0 < expDiff ) goto aExpBigger;
				5734	if ( expDiff < 0 ) goto bExpBigger;
				5735	if ( aExp == 0x7FFF ) {
				5736	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				5737	return propagateFloat128NaN( a, b STATUS_VAR );
				5738	}
				5739	float_raise( float_flag_invalid STATUS_VAR);
				5740	z.low = float128_default_nan_low;
				5741	z.high = float128_default_nan_high;
				5742	return z;
				5743	}
				5744	if ( aExp == 0 ) {
				5745	aExp = 1;
				5746	bExp = 1;
				5747	}
				5748	if ( bSig0 < aSig0 ) goto aBigger;
				5749	if ( aSig0 < bSig0 ) goto bBigger;
				5750	if ( bSig1 < aSig1 ) goto aBigger;
				5751	if ( aSig1 < bSig1 ) goto bBigger;
				5752	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
				5753	bExpBigger:
				5754	if ( bExp == 0x7FFF ) {
				5755	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5756	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
				5757	}
				5758	if ( aExp == 0 ) {
				5759	++expDiff;
				5760	}
				5761	else {
				5762	aSig0 \|= LIT64( 0x4000000000000000 );
				5763	}
				5764	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				5765	bSig0 \|= LIT64( 0x4000000000000000 );
				5766	bBigger:
				5767	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
				5768	zExp = bExp;
				5769	zSign ^= 1;
				5770	goto normalizeRoundAndPack;
				5771	aExpBigger:
				5772	if ( aExp == 0x7FFF ) {
				5773	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5774	return a;
				5775	}
				5776	if ( bExp == 0 ) {
				5777	--expDiff;
				5778	}
				5779	else {
				5780	bSig0 \|= LIT64( 0x4000000000000000 );
				5781	}
				5782	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
				5783	aSig0 \|= LIT64( 0x4000000000000000 );
				5784	aBigger:
				5785	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				5786	zExp = aExp;
				5787	normalizeRoundAndPack:
				5788	--zExp;
				5789	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
				5790
				5791	}
				5792
				5793	/*----------------------------------------------------------------------------
				5794	\| Returns the result of adding the quadruple-precision floating-point values
				5795	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				5796	\| for Binary Floating-Point Arithmetic.
				5797	----------------------------------------------------------------------------/
				5798
				5799	float128 float128_add( float128 a, float128 b STATUS_PARAM )
				5800	{
				5801	flag aSign, bSign;
				5802
				5803	aSign = extractFloat128Sign( a );
				5804	bSign = extractFloat128Sign( b );
				5805	if ( aSign == bSign ) {
				5806	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				5807	}
				5808	else {
				5809	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				5810	}
				5811
				5812	}
				5813
				5814	/*----------------------------------------------------------------------------
				5815	\| Returns the result of subtracting the quadruple-precision floating-point
				5816	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				5817	\| Standard for Binary Floating-Point Arithmetic.
				5818	----------------------------------------------------------------------------/
				5819
				5820	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
				5821	{
				5822	flag aSign, bSign;
				5823
				5824	aSign = extractFloat128Sign( a );
				5825	bSign = extractFloat128Sign( b );
				5826	if ( aSign == bSign ) {
				5827	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				5828	}
				5829	else {
				5830	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				5831	}
				5832
				5833	}
				5834
				5835	/*----------------------------------------------------------------------------
				5836	\| Returns the result of multiplying the quadruple-precision floating-point
				5837	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				5838	\| Standard for Binary Floating-Point Arithmetic.
				5839	----------------------------------------------------------------------------/
				5840
				5841	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
				5842	{
				5843	flag aSign, bSign, zSign;
				5844	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5845	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5846	float128 z;
				5847
				5848	aSig1 = extractFloat128Frac1( a );
				5849	aSig0 = extractFloat128Frac0( a );
				5850	aExp = extractFloat128Exp( a );
				5851	aSign = extractFloat128Sign( a );
				5852	bSig1 = extractFloat128Frac1( b );
				5853	bSig0 = extractFloat128Frac0( b );
				5854	bExp = extractFloat128Exp( b );
				5855	bSign = extractFloat128Sign( b );
				5856	zSign = aSign ^ bSign;
				5857	if ( aExp == 0x7FFF ) {
				5858	if ( ( aSig0 \| aSig1 )
				5859	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				5860	return propagateFloat128NaN( a, b STATUS_VAR );
				5861	}
				5862	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
				5863	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5864	}
				5865	if ( bExp == 0x7FFF ) {
				5866	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5867	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				5868	invalid:
				5869	float_raise( float_flag_invalid STATUS_VAR);
				5870	z.low = float128_default_nan_low;
				5871	z.high = float128_default_nan_high;
				5872	return z;
				5873	}
				5874	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5875	}
				5876	if ( aExp == 0 ) {
				5877	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				5878	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5879	}
				5880	if ( bExp == 0 ) {
				5881	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				5882	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				5883	}
				5884	zExp = aExp + bExp - 0x4000;
				5885	aSig0 \|= LIT64( 0x0001000000000000 );
				5886	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
				5887	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
				5888	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
				5889	zSig2 \|= ( zSig3 != 0 );
				5890	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
				5891	shift128ExtraRightJamming(
				5892	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				5893	++zExp;
				5894	}
				5895	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5896
				5897	}
				5898
				5899	/*----------------------------------------------------------------------------
				5900	\| Returns the result of dividing the quadruple-precision floating-point value
				5901	\| `a' by the corresponding value `b'. The operation is performed according to
				5902	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5903	----------------------------------------------------------------------------/
				5904
				5905	float128 float128_div( float128 a, float128 b STATUS_PARAM )
				5906	{
				5907	flag aSign, bSign, zSign;
				5908	int32 aExp, bExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5909	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				5910	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5911	float128 z;
				5912
				5913	aSig1 = extractFloat128Frac1( a );
				5914	aSig0 = extractFloat128Frac0( a );
				5915	aExp = extractFloat128Exp( a );
				5916	aSign = extractFloat128Sign( a );
				5917	bSig1 = extractFloat128Frac1( b );
				5918	bSig0 = extractFloat128Frac0( b );
				5919	bExp = extractFloat128Exp( b );
				5920	bSign = extractFloat128Sign( b );
				5921	zSign = aSign ^ bSign;
				5922	if ( aExp == 0x7FFF ) {
				5923	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5924	if ( bExp == 0x7FFF ) {
				5925	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5926	goto invalid;
				5927	}
				5928	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5929	}
				5930	if ( bExp == 0x7FFF ) {
				5931	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5932	return packFloat128( zSign, 0, 0, 0 );
				5933	}
				5934	if ( bExp == 0 ) {
				5935	if ( ( bSig0 \| bSig1 ) == 0 ) {
				5936	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				5937	invalid:
				5938	float_raise( float_flag_invalid STATUS_VAR);
				5939	z.low = float128_default_nan_low;
				5940	z.high = float128_default_nan_high;
				5941	return z;
				5942	}
				5943	float_raise( float_flag_divbyzero STATUS_VAR);
				5944	return packFloat128( zSign, 0x7FFF, 0, 0 );
				5945	}
				5946	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				5947	}
				5948	if ( aExp == 0 ) {
				5949	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				5950	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5951	}
				5952	zExp = aExp - bExp + 0x3FFD;
				5953	shortShift128Left(
				5954	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
				5955	shortShift128Left(
				5956	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				5957	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
				5958	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
				5959	++zExp;
				5960	}
				5961	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
				5962	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
				5963	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5964	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5965	--zSig0;
				5966	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
				5967	}
				5968	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
				5969	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
				5970	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
				5971	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5972	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5973	--zSig1;
				5974	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
				5975	}
				5976	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				5977	}
				5978	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
				5979	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5980
				5981	}
				5982
				5983	/*----------------------------------------------------------------------------
				5984	\| Returns the remainder of the quadruple-precision floating-point value `a'
				5985	\| with respect to the corresponding value `b'. The operation is performed
				5986	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5987	----------------------------------------------------------------------------/
				5988
				5989	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
				5990	{
Blue Swirl	ed086f3	2010-03-07 13:49:58 +0000	[diff] [blame]	5991	flag aSign, zSign;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5992	int32 aExp, bExp, expDiff;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	5993	uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
				5994	uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
				5995	int64_t sigMean0;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5996	float128 z;
				5997
				5998	aSig1 = extractFloat128Frac1( a );
				5999	aSig0 = extractFloat128Frac0( a );
				6000	aExp = extractFloat128Exp( a );
				6001	aSign = extractFloat128Sign( a );
				6002	bSig1 = extractFloat128Frac1( b );
				6003	bSig0 = extractFloat128Frac0( b );
				6004	bExp = extractFloat128Exp( b );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6005	if ( aExp == 0x7FFF ) {
				6006	if ( ( aSig0 \| aSig1 )
				6007	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				6008	return propagateFloat128NaN( a, b STATUS_VAR );
				6009	}
				6010	goto invalid;
				6011	}
				6012	if ( bExp == 0x7FFF ) {
				6013	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				6014	return a;
				6015	}
				6016	if ( bExp == 0 ) {
				6017	if ( ( bSig0 \| bSig1 ) == 0 ) {
				6018	invalid:
				6019	float_raise( float_flag_invalid STATUS_VAR);
				6020	z.low = float128_default_nan_low;
				6021	z.high = float128_default_nan_high;
				6022	return z;
				6023	}
				6024	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				6025	}
				6026	if ( aExp == 0 ) {
				6027	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
				6028	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6029	}
				6030	expDiff = aExp - bExp;
				6031	if ( expDiff < -1 ) return a;
				6032	shortShift128Left(
				6033	aSig0 \| LIT64( 0x0001000000000000 ),
				6034	aSig1,
				6035	15 - ( expDiff < 0 ),
				6036	&aSig0,
				6037	&aSig1
				6038	);
				6039	shortShift128Left(
				6040	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				6041	q = le128( bSig0, bSig1, aSig0, aSig1 );
				6042	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				6043	expDiff -= 64;
				6044	while ( 0 < expDiff ) {
				6045	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				6046	q = ( 4 < q ) ? q - 4 : 0;
				6047	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				6048	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
				6049	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
				6050	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
				6051	expDiff -= 61;
				6052	}
				6053	if ( -64 < expDiff ) {
				6054	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				6055	q = ( 4 < q ) ? q - 4 : 0;
				6056	q >>= - expDiff;
				6057	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				6058	expDiff += 52;
				6059	if ( expDiff < 0 ) {
				6060	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				6061	}
				6062	else {
				6063	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
				6064	}
				6065	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				6066	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
				6067	}
				6068	else {
				6069	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
				6070	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				6071	}
				6072	do {
				6073	alternateASig0 = aSig0;
				6074	alternateASig1 = aSig1;
				6075	++q;
				6076	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6077	} while ( 0 <= (int64_t) aSig0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6078	add128(
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6079	aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6080	if ( ( sigMean0 < 0 )
				6081	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
				6082	aSig0 = alternateASig0;
				6083	aSig1 = alternateASig1;
				6084	}
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6085	zSign = ( (int64_t) aSig0 < 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6086	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
				6087	return
				6088	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
				6089
				6090	}
				6091
				6092	/*----------------------------------------------------------------------------
				6093	\| Returns the square root of the quadruple-precision floating-point value `a'.
				6094	\| The operation is performed according to the IEC/IEEE Standard for Binary
				6095	\| Floating-Point Arithmetic.
				6096	----------------------------------------------------------------------------/
				6097
				6098	float128 float128_sqrt( float128 a STATUS_PARAM )
				6099	{
				6100	flag aSign;
				6101	int32 aExp, zExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6102	uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
				6103	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6104	float128 z;
				6105
				6106	aSig1 = extractFloat128Frac1( a );
				6107	aSig0 = extractFloat128Frac0( a );
				6108	aExp = extractFloat128Exp( a );
				6109	aSign = extractFloat128Sign( a );
				6110	if ( aExp == 0x7FFF ) {
				6111	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
				6112	if ( ! aSign ) return a;
				6113	goto invalid;
				6114	}
				6115	if ( aSign ) {
				6116	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
				6117	invalid:
				6118	float_raise( float_flag_invalid STATUS_VAR);
				6119	z.low = float128_default_nan_low;
				6120	z.high = float128_default_nan_high;
				6121	return z;
				6122	}
				6123	if ( aExp == 0 ) {
				6124	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
				6125	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				6126	}
				6127	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
				6128	aSig0 \|= LIT64( 0x0001000000000000 );
				6129	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
				6130	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
				6131	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				6132	doubleZSig0 = zSig0<<1;
				6133	mul64To128( zSig0, zSig0, &term0, &term1 );
				6134	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6135	while ( (int64_t) rem0 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6136	--zSig0;
				6137	doubleZSig0 -= 2;
				6138	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				6139	}
				6140	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				6141	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
				6142	if ( zSig1 == 0 ) zSig1 = 1;
				6143	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				6144	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				6145	mul64To128( zSig1, zSig1, &term2, &term3 );
				6146	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6147	while ( (int64_t) rem1 < 0 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6148	--zSig1;
				6149	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				6150	term3 \|= 1;
				6151	term2 \|= doubleZSig0;
				6152	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				6153	}
				6154	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				6155	}
				6156	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
				6157	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				6158
				6159	}
				6160
				6161	/*----------------------------------------------------------------------------
				6162	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6163	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				6164	\| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6165	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6166	----------------------------------------------------------------------------/
				6167
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6168	int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6169	{
				6170
				6171	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6172	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6173	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6174	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6175	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6176	float_raise( float_flag_invalid STATUS_VAR);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6177	return 0;
				6178	}
				6179	return
				6180	( a.low == b.low )
				6181	&& ( ( a.high == b.high )
				6182	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6183	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6184	);
				6185
				6186	}
				6187
				6188	/*----------------------------------------------------------------------------
				6189	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6190	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
				6191	\| exception is raised if either operand is a NaN. The comparison is performed
				6192	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6193	----------------------------------------------------------------------------/
				6194
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6195	int float128_le( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6196	{
				6197	flag aSign, bSign;
				6198
				6199	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6200	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6201	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6202	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6203	) {
				6204	float_raise( float_flag_invalid STATUS_VAR);
				6205	return 0;
				6206	}
				6207	aSign = extractFloat128Sign( a );
				6208	bSign = extractFloat128Sign( b );
				6209	if ( aSign != bSign ) {
				6210	return
				6211	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6212	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6213	== 0 );
				6214	}
				6215	return
				6216	aSign ? le128( b.high, b.low, a.high, a.low )
				6217	: le128( a.high, a.low, b.high, b.low );
				6218
				6219	}
				6220
				6221	/*----------------------------------------------------------------------------
				6222	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6223	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				6224	\| raised if either operand is a NaN. The comparison is performed according
				6225	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6226	----------------------------------------------------------------------------/
				6227
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6228	int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6229	{
				6230	flag aSign, bSign;
				6231
				6232	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6233	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6234	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6235	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6236	) {
				6237	float_raise( float_flag_invalid STATUS_VAR);
				6238	return 0;
				6239	}
				6240	aSign = extractFloat128Sign( a );
				6241	bSign = extractFloat128Sign( b );
				6242	if ( aSign != bSign ) {
				6243	return
				6244	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6245	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6246	!= 0 );
				6247	}
				6248	return
				6249	aSign ? lt128( b.high, b.low, a.high, a.low )
				6250	: lt128( a.high, a.low, b.high, b.low );
				6251
				6252	}
				6253
				6254	/*----------------------------------------------------------------------------
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6255	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6256	\| be compared, and 0 otherwise. The invalid exception is raised if either
				6257	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
				6258	\| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6259	----------------------------------------------------------------------------/
				6260
				6261	int float128_unordered( float128 a, float128 b STATUS_PARAM )
				6262	{
				6263	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6264	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6265	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6266	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6267	) {
				6268	float_raise( float_flag_invalid STATUS_VAR);
				6269	return 1;
				6270	}
				6271	return 0;
				6272	}
				6273
				6274	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6275	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarno	f5a6425	2011-04-14 00:49:30 +0200	[diff] [blame]	6276	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				6277	\| exception. The comparison is performed according to the IEC/IEEE Standard
				6278	\| for Binary Floating-Point Arithmetic.
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6279	----------------------------------------------------------------------------/
				6280
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6281	int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6282	{
				6283
				6284	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6285	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6286	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6287	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6288	) {
Aurelien Jarno	b689362	2011-04-14 00:49:29 +0200	[diff] [blame]	6289	if ( float128_is_signaling_nan( a )
				6290	\|\| float128_is_signaling_nan( b ) ) {
				6291	float_raise( float_flag_invalid STATUS_VAR);
				6292	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6293	return 0;
				6294	}
				6295	return
				6296	( a.low == b.low )
				6297	&& ( ( a.high == b.high )
				6298	\|\| ( ( a.low == 0 )
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6299	&& ( (uint64_t) ( ( a.high \| b.high )<<1 ) == 0 ) )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6300	);
				6301
				6302	}
				6303
				6304	/*----------------------------------------------------------------------------
				6305	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				6306	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				6307	\| cause an exception. Otherwise, the comparison is performed according to the
				6308	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				6309	----------------------------------------------------------------------------/
				6310
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6311	int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6312	{
				6313	flag aSign, bSign;
				6314
				6315	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6316	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6317	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6318	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6319	) {
				6320	if ( float128_is_signaling_nan( a )
				6321	\|\| float128_is_signaling_nan( b ) ) {
				6322	float_raise( float_flag_invalid STATUS_VAR);
				6323	}
				6324	return 0;
				6325	}
				6326	aSign = extractFloat128Sign( a );
				6327	bSign = extractFloat128Sign( b );
				6328	if ( aSign != bSign ) {
				6329	return
				6330	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6331	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6332	== 0 );
				6333	}
				6334	return
				6335	aSign ? le128( b.high, b.low, a.high, a.low )
				6336	: le128( a.high, a.low, b.high, b.low );
				6337
				6338	}
				6339
				6340	/*----------------------------------------------------------------------------
				6341	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				6342	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				6343	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				6344	\| Standard for Binary Floating-Point Arithmetic.
				6345	----------------------------------------------------------------------------/
				6346
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6347	int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6348	{
				6349	flag aSign, bSign;
				6350
				6351	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6352	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6353	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6354	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6355	) {
				6356	if ( float128_is_signaling_nan( a )
				6357	\|\| float128_is_signaling_nan( b ) ) {
				6358	float_raise( float_flag_invalid STATUS_VAR);
				6359	}
				6360	return 0;
				6361	}
				6362	aSign = extractFloat128Sign( a );
				6363	bSign = extractFloat128Sign( b );
				6364	if ( aSign != bSign ) {
				6365	return
				6366	aSign
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6367	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	6368	!= 0 );
				6369	}
				6370	return
				6371	aSign ? lt128( b.high, b.low, a.high, a.low )
				6372	: lt128( a.high, a.low, b.high, b.low );
				6373
				6374	}
				6375
Aurelien Jarno	67b7861	2011-04-14 00:49:29 +0200	[diff] [blame]	6376	/*----------------------------------------------------------------------------
				6377	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
				6378	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
				6379	\| comparison is performed according to the IEC/IEEE Standard for Binary
				6380	\| Floating-Point Arithmetic.
				6381	----------------------------------------------------------------------------/
				6382
				6383	int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
				6384	{
				6385	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				6386	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				6387	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				6388	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				6389	) {
				6390	if ( float128_is_signaling_nan( a )
				6391	\|\| float128_is_signaling_nan( b ) ) {
				6392	float_raise( float_flag_invalid STATUS_VAR);
				6393	}
				6394	return 1;
				6395	}
				6396	return 0;
				6397	}
				6398
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6399	/* misc functions */
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6400	float32 uint32_to_float32( uint32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6401	{
				6402	return int64_to_float32(a STATUS_VAR);
				6403	}
				6404
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6405	float64 uint32_to_float64( uint32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6406	{
				6407	return int64_to_float64(a STATUS_VAR);
				6408	}
				6409
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6410	uint32 float32_to_uint32( float32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6411	{
				6412	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6413	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6414
				6415	v = float32_to_int64(a STATUS_VAR);
				6416	if (v < 0) {
				6417	res = 0;
				6418	float_raise( float_flag_invalid STATUS_VAR);
				6419	} else if (v > 0xffffffff) {
				6420	res = 0xffffffff;
				6421	float_raise( float_flag_invalid STATUS_VAR);
				6422	} else {
				6423	res = v;
				6424	}
				6425	return res;
				6426	}
				6427
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6428	uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6429	{
				6430	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6431	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6432
				6433	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				6434	if (v < 0) {
				6435	res = 0;
				6436	float_raise( float_flag_invalid STATUS_VAR);
				6437	} else if (v > 0xffffffff) {
				6438	res = 0xffffffff;
				6439	float_raise( float_flag_invalid STATUS_VAR);
				6440	} else {
				6441	res = v;
				6442	}
				6443	return res;
				6444	}
				6445
Andreas Färber	38641f8	2011-08-28 20:24:33 +0200	[diff] [blame]	6446	uint16 float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6447	{
				6448	int64_t v;
Andreas Färber	38641f8	2011-08-28 20:24:33 +0200	[diff] [blame]	6449	uint16 res;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6450
				6451	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				6452	if (v < 0) {
				6453	res = 0;
				6454	float_raise( float_flag_invalid STATUS_VAR);
				6455	} else if (v > 0xffff) {
				6456	res = 0xffff;
				6457	float_raise( float_flag_invalid STATUS_VAR);
				6458	} else {
				6459	res = v;
				6460	}
				6461	return res;
				6462	}
				6463
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6464	uint32 float64_to_uint32( float64 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6465	{
				6466	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6467	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6468
				6469	v = float64_to_int64(a STATUS_VAR);
				6470	if (v < 0) {
				6471	res = 0;
				6472	float_raise( float_flag_invalid STATUS_VAR);
				6473	} else if (v > 0xffffffff) {
				6474	res = 0xffffffff;
				6475	float_raise( float_flag_invalid STATUS_VAR);
				6476	} else {
				6477	res = v;
				6478	}
				6479	return res;
				6480	}
				6481
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6482	uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6483	{
				6484	int64_t v;
Andreas Färber	9f8d2a0	2011-08-28 20:24:34 +0200	[diff] [blame]	6485	uint32 res;
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6486
				6487	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				6488	if (v < 0) {
				6489	res = 0;
				6490	float_raise( float_flag_invalid STATUS_VAR);
				6491	} else if (v > 0xffffffff) {
				6492	res = 0xffffffff;
				6493	float_raise( float_flag_invalid STATUS_VAR);
				6494	} else {
				6495	res = v;
				6496	}
				6497	return res;
				6498	}
				6499
Andreas Färber	38641f8	2011-08-28 20:24:33 +0200	[diff] [blame]	6500	uint16 float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6501	{
				6502	int64_t v;
Andreas Färber	38641f8	2011-08-28 20:24:33 +0200	[diff] [blame]	6503	uint16 res;
Peter Maydell	cbcef45	2010-12-07 15:37:34 +0000	[diff] [blame]	6504
				6505	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				6506	if (v < 0) {
				6507	res = 0;
				6508	float_raise( float_flag_invalid STATUS_VAR);
				6509	} else if (v > 0xffff) {
				6510	res = 0xffff;
				6511	float_raise( float_flag_invalid STATUS_VAR);
				6512	} else {
				6513	res = v;
				6514	}
				6515	return res;
				6516	}
				6517
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6518	/* FIXME: This looks broken. */
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	6519	uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
				6520	{
				6521	int64_t v;
				6522
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6523	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				6524	v += float64_val(a);
				6525	v = float64_to_int64(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	6526
				6527	return v - INT64_MIN;
				6528	}
				6529
				6530	uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
				6531	{
				6532	int64_t v;
				6533
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6534	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				6535	v += float64_val(a);
				6536	v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	6537
				6538	return v - INT64_MIN;
				6539	}
				6540
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6541	#define COMPARE(s, nan_exp) \
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6542	INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6543	int is_quiet STATUS_PARAM ) \
				6544	{ \
				6545	flag aSign, bSign; \
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6546	uint ## s ## _t av, bv; \
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	6547	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
				6548	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6549	\
				6550	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
				6551	extractFloat ## s ## Frac( a ) ) \|\| \
				6552	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
				6553	extractFloat ## s ## Frac( b ) )) { \
				6554	if (!is_quiet \|\| \
				6555	float ## s ## _is_signaling_nan( a ) \|\| \
				6556	float ## s ## _is_signaling_nan( b ) ) { \
				6557	float_raise( float_flag_invalid STATUS_VAR); \
				6558	} \
				6559	return float_relation_unordered; \
				6560	} \
				6561	aSign = extractFloat ## s ## Sign( a ); \
				6562	bSign = extractFloat ## s ## Sign( b ); \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6563	av = float ## s ## _val(a); \
blueswir1	cd8a253	2007-11-21 18:57:44 +0000	[diff] [blame]	6564	bv = float ## s ## _val(b); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6565	if ( aSign != bSign ) { \
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6566	if ( (uint ## s ## _t) ( ( av \| bv )<<1 ) == 0 ) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6567	/* zero case */ \
				6568	return float_relation_equal; \
				6569	} else { \
				6570	return 1 - (2 * aSign); \
				6571	} \
				6572	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6573	if (av == bv) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6574	return float_relation_equal; \
				6575	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	6576	return 1 - 2 * (aSign ^ ( av < bv )); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6577	} \
				6578	} \
				6579	} \
				6580	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6581	int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6582	{ \
				6583	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
				6584	} \
				6585	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	6586	int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	6587	{ \
				6588	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
				6589	}
				6590
				6591	COMPARE(32, 0xff)
				6592	COMPARE(64, 0x7ff)
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6593
Aurelien Jarno	f6714d3	2011-04-20 13:04:22 +0200	[diff] [blame]	6594	INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
				6595	int is_quiet STATUS_PARAM )
				6596	{
				6597	flag aSign, bSign;
				6598
				6599	if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
				6600	( extractFloatx80Frac( a )<<1 ) ) \|\|
				6601	( ( extractFloatx80Exp( b ) == 0x7fff ) &&
				6602	( extractFloatx80Frac( b )<<1 ) )) {
				6603	if (!is_quiet \|\|
				6604	floatx80_is_signaling_nan( a ) \|\|
				6605	floatx80_is_signaling_nan( b ) ) {
				6606	float_raise( float_flag_invalid STATUS_VAR);
				6607	}
				6608	return float_relation_unordered;
				6609	}
				6610	aSign = extractFloatx80Sign( a );
				6611	bSign = extractFloatx80Sign( b );
				6612	if ( aSign != bSign ) {
				6613
				6614	if ( ( ( (uint16_t) ( ( a.high \| b.high ) << 1 ) ) == 0) &&
				6615	( ( a.low \| b.low ) == 0 ) ) {
				6616	/* zero case */
				6617	return float_relation_equal;
				6618	} else {
				6619	return 1 - (2 * aSign);
				6620	}
				6621	} else {
				6622	if (a.low == b.low && a.high == b.high) {
				6623	return float_relation_equal;
				6624	} else {
				6625	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				6626	}
				6627	}
				6628	}
				6629
				6630	int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
				6631	{
				6632	return floatx80_compare_internal(a, b, 0 STATUS_VAR);
				6633	}
				6634
				6635	int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
				6636	{
				6637	return floatx80_compare_internal(a, b, 1 STATUS_VAR);
				6638	}
				6639
blueswir1	1f58732	2007-11-25 18:40:20 +0000	[diff] [blame]	6640	INLINE int float128_compare_internal( float128 a, float128 b,
				6641	int is_quiet STATUS_PARAM )
				6642	{
				6643	flag aSign, bSign;
				6644
				6645	if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
				6646	( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) ) \|\|
				6647	( ( extractFloat128Exp( b ) == 0x7fff ) &&
				6648	( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )) {
				6649	if (!is_quiet \|\|
				6650	float128_is_signaling_nan( a ) \|\|
				6651	float128_is_signaling_nan( b ) ) {
				6652	float_raise( float_flag_invalid STATUS_VAR);
				6653	}
				6654	return float_relation_unordered;
				6655	}
				6656	aSign = extractFloat128Sign( a );
				6657	bSign = extractFloat128Sign( b );
				6658	if ( aSign != bSign ) {
				6659	if ( ( ( ( a.high \| b.high )<<1 ) \| a.low \| b.low ) == 0 ) {
				6660	/* zero case */
				6661	return float_relation_equal;
				6662	} else {
				6663	return 1 - (2 * aSign);
				6664	}
				6665	} else {
				6666	if (a.low == b.low && a.high == b.high) {
				6667	return float_relation_equal;
				6668	} else {
				6669	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				6670	}
				6671	}
				6672	}
				6673
				6674	int float128_compare( float128 a, float128 b STATUS_PARAM )
				6675	{
				6676	return float128_compare_internal(a, b, 0 STATUS_VAR);
				6677	}
				6678
				6679	int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
				6680	{
				6681	return float128_compare_internal(a, b, 1 STATUS_VAR);
				6682	}
				6683
Peter Maydell	274f1b0	2011-03-11 08:12:25 +0000	[diff] [blame]	6684	/* min() and max() functions. These can't be implemented as
				6685	* 'compare and pick one input' because that would mishandle
				6686	* NaNs and +0 vs -0.
				6687	*/
				6688	#define MINMAX(s, nan_exp) \
				6689	INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
				6690	int ismin STATUS_PARAM ) \
				6691	{ \
				6692	flag aSign, bSign; \
				6693	uint ## s ## _t av, bv; \
				6694	a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
				6695	b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
				6696	if (float ## s ## _is_any_nan(a) \|\| \
				6697	float ## s ## _is_any_nan(b)) { \
				6698	return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
				6699	} \
				6700	aSign = extractFloat ## s ## Sign(a); \
				6701	bSign = extractFloat ## s ## Sign(b); \
				6702	av = float ## s ## _val(a); \
				6703	bv = float ## s ## _val(b); \
				6704	if (aSign != bSign) { \
				6705	if (ismin) { \
				6706	return aSign ? a : b; \
				6707	} else { \
				6708	return aSign ? b : a; \
				6709	} \
				6710	} else { \
				6711	if (ismin) { \
				6712	return (aSign ^ (av < bv)) ? a : b; \
				6713	} else { \
				6714	return (aSign ^ (av < bv)) ? b : a; \
				6715	} \
				6716	} \
				6717	} \
				6718	\
				6719	float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
				6720	{ \
				6721	return float ## s ## _minmax(a, b, 1 STATUS_VAR); \
				6722	} \
				6723	\
				6724	float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
				6725	{ \
				6726	return float ## s ## _minmax(a, b, 0 STATUS_VAR); \
				6727	}
				6728
				6729	MINMAX(32, 0xff)
				6730	MINMAX(64, 0x7ff)
				6731
				6732
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6733	/* Multiply A by 2 raised to the power N. */
				6734	float32 float32_scalbn( float32 a, int n STATUS_PARAM )
				6735	{
				6736	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6737	int16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6738	uint32_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6739
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	6740	a = float32_squash_input_denormal(a STATUS_VAR);
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6741	aSig = extractFloat32Frac( a );
				6742	aExp = extractFloat32Exp( a );
				6743	aSign = extractFloat32Sign( a );
				6744
				6745	if ( aExp == 0xFF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6746	if ( aSig ) {
				6747	return propagateFloat32NaN( a, a STATUS_VAR );
				6748	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6749	return a;
				6750	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6751	if ( aExp != 0 )
				6752	aSig \|= 0x00800000;
				6753	else if ( aSig == 0 )
				6754	return a;
				6755
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6756	if (n > 0x200) {
				6757	n = 0x200;
				6758	} else if (n < -0x200) {
				6759	n = -0x200;
				6760	}
				6761
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6762	aExp += n - 1;
				6763	aSig <<= 7;
				6764	return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6765	}
				6766
				6767	float64 float64_scalbn( float64 a, int n STATUS_PARAM )
				6768	{
				6769	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6770	int16_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6771	uint64_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6772
Peter Maydell	37d1866	2011-01-06 19:37:53 +0000	[diff] [blame]	6773	a = float64_squash_input_denormal(a STATUS_VAR);
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6774	aSig = extractFloat64Frac( a );
				6775	aExp = extractFloat64Exp( a );
				6776	aSign = extractFloat64Sign( a );
				6777
				6778	if ( aExp == 0x7FF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6779	if ( aSig ) {
				6780	return propagateFloat64NaN( a, a STATUS_VAR );
				6781	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6782	return a;
				6783	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6784	if ( aExp != 0 )
				6785	aSig \|= LIT64( 0x0010000000000000 );
				6786	else if ( aSig == 0 )
				6787	return a;
				6788
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6789	if (n > 0x1000) {
				6790	n = 0x1000;
				6791	} else if (n < -0x1000) {
				6792	n = -0x1000;
				6793	}
				6794
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6795	aExp += n - 1;
				6796	aSig <<= 10;
				6797	return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6798	}
				6799
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6800	floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
				6801	{
				6802	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6803	int32_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6804	uint64_t aSig;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6805
				6806	aSig = extractFloatx80Frac( a );
				6807	aExp = extractFloatx80Exp( a );
				6808	aSign = extractFloatx80Sign( a );
				6809
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6810	if ( aExp == 0x7FFF ) {
				6811	if ( aSig<<1 ) {
				6812	return propagateFloatx80NaN( a, a STATUS_VAR );
				6813	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6814	return a;
				6815	}
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6816
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6817	if (aExp == 0 && aSig == 0)
				6818	return a;
				6819
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6820	if (n > 0x10000) {
				6821	n = 0x10000;
				6822	} else if (n < -0x10000) {
				6823	n = -0x10000;
				6824	}
				6825
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6826	aExp += n;
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6827	return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
				6828	aSign, aExp, aSig, 0 STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6829	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6830
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6831	float128 float128_scalbn( float128 a, int n STATUS_PARAM )
				6832	{
				6833	flag aSign;
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6834	int32_t aExp;
Andreas Färber	bb98fe4	2011-03-07 01:34:06 +0100	[diff] [blame]	6835	uint64_t aSig0, aSig1;
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6836
				6837	aSig1 = extractFloat128Frac1( a );
				6838	aSig0 = extractFloat128Frac0( a );
				6839	aExp = extractFloat128Exp( a );
				6840	aSign = extractFloat128Sign( a );
				6841	if ( aExp == 0x7FFF ) {
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6842	if ( aSig0 \| aSig1 ) {
				6843	return propagateFloat128NaN( a, a STATUS_VAR );
				6844	}
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6845	return a;
				6846	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6847	if ( aExp != 0 )
				6848	aSig0 \|= LIT64( 0x0001000000000000 );
				6849	else if ( aSig0 == 0 && aSig1 == 0 )
				6850	return a;
				6851
Aurelien Jarno	326b9e9	2011-04-20 13:04:22 +0200	[diff] [blame]	6852	if (n > 0x10000) {
				6853	n = 0x10000;
				6854	} else if (n < -0x10000) {
				6855	n = -0x10000;
				6856	}
				6857
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	6858	aExp += n - 1;
				6859	return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
				6860	STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	6861
				6862	}