Blame - fpu/softfloat.c - qemu

blob: 4d58744c87b6582363603c0162f19d18c60cd97e [file] [log] [blame]

bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1
				2	/*============================================================================
				3
				4	This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
				5	Package, Release 2b.
				6
				7	Written by John R. Hauser. This work was made possible in part by the
				8	International Computer Science Institute, located at Suite 600, 1947 Center
				9	Street, Berkeley, California 94704. Funding was partially provided by the
				10	National Science Foundation under grant MIP-9311980. The original version
				11	of this code was written as part of a project to build a fixed-point vector
				12	processor in collaboration with the University of California at Berkeley,
				13	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
				14	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
				15	arithmetic/SoftFloat.html'.
				16
				17	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
				18	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
				19	RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
				20	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
				21	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
				22	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
				23	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
				24	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
				25
				26	Derivative works are acceptable, even for commercial purposes, so long as
				27	(1) the source code for the derivative work includes prominent notice that
				28	the work is derivative, and (2) the source code includes prominent notice with
				29	these four paragraphs for those parts of this code that are retained.
				30
				31	=============================================================================*/
				32
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	33	/* FIXME: Flush-To-Zero only effects results. Denormal inputs should also
				34	be flushed to zero. */
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	35	#include "softfloat.h"
				36
				37	/*----------------------------------------------------------------------------
				38	\| Primitive arithmetic functions, including multi-word arithmetic, and
				39	\| division and square root approximations. (Can be specialized to target if
				40	\| desired.)
				41	----------------------------------------------------------------------------/
				42	#include "softfloat-macros.h"
				43
				44	/*----------------------------------------------------------------------------
				45	\| Functions and definitions to determine: (1) whether tininess for underflow
				46	\| is detected before or after rounding by default, (2) what (if anything)
				47	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
				48	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
				49	\| are propagated from function inputs to output. These details are target-
				50	\| specific.
				51	----------------------------------------------------------------------------/
				52	#include "softfloat-specialize.h"
				53
				54	void set_float_rounding_mode(int val STATUS_PARAM)
				55	{
				56	STATUS(float_rounding_mode) = val;
				57	}
				58
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	59	void set_float_exception_flags(int val STATUS_PARAM)
				60	{
				61	STATUS(float_exception_flags) = val;
				62	}
				63
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	64	#ifdef FLOATX80
				65	void set_floatx80_rounding_precision(int val STATUS_PARAM)
				66	{
				67	STATUS(floatx80_rounding_precision) = val;
				68	}
				69	#endif
				70
				71	/*----------------------------------------------------------------------------
				72	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
				73	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
				74	\| input. If `zSign' is 1, the input is negated before being converted to an
				75	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
				76	\| is simply rounded to an integer, with the inexact exception raised if the
				77	\| input cannot be represented exactly as an integer. However, if the fixed-
				78	\| point input is too large, the invalid exception is raised and the largest
				79	\| positive or negative integer is returned.
				80	----------------------------------------------------------------------------/
				81
				82	static int32 roundAndPackInt32( flag zSign, bits64 absZ STATUS_PARAM)
				83	{
				84	int8 roundingMode;
				85	flag roundNearestEven;
				86	int8 roundIncrement, roundBits;
				87	int32 z;
				88
				89	roundingMode = STATUS(float_rounding_mode);
				90	roundNearestEven = ( roundingMode == float_round_nearest_even );
				91	roundIncrement = 0x40;
				92	if ( ! roundNearestEven ) {
				93	if ( roundingMode == float_round_to_zero ) {
				94	roundIncrement = 0;
				95	}
				96	else {
				97	roundIncrement = 0x7F;
				98	if ( zSign ) {
				99	if ( roundingMode == float_round_up ) roundIncrement = 0;
				100	}
				101	else {
				102	if ( roundingMode == float_round_down ) roundIncrement = 0;
				103	}
				104	}
				105	}
				106	roundBits = absZ & 0x7F;
				107	absZ = ( absZ + roundIncrement )>>7;
				108	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				109	z = absZ;
				110	if ( zSign ) z = - z;
				111	if ( ( absZ>>32 ) \|\| ( z && ( ( z < 0 ) ^ zSign ) ) ) {
				112	float_raise( float_flag_invalid STATUS_VAR);
				113	return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				114	}
				115	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				116	return z;
				117
				118	}
				119
				120	/*----------------------------------------------------------------------------
				121	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
				122	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
				123	\| and returns the properly rounded 64-bit integer corresponding to the input.
				124	\| If `zSign' is 1, the input is negated before being converted to an integer.
				125	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
				126	\| the inexact exception raised if the input cannot be represented exactly as
				127	\| an integer. However, if the fixed-point input is too large, the invalid
				128	\| exception is raised and the largest positive or negative integer is
				129	\| returned.
				130	----------------------------------------------------------------------------/
				131
				132	static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 STATUS_PARAM)
				133	{
				134	int8 roundingMode;
				135	flag roundNearestEven, increment;
				136	int64 z;
				137
				138	roundingMode = STATUS(float_rounding_mode);
				139	roundNearestEven = ( roundingMode == float_round_nearest_even );
				140	increment = ( (sbits64) absZ1 < 0 );
				141	if ( ! roundNearestEven ) {
				142	if ( roundingMode == float_round_to_zero ) {
				143	increment = 0;
				144	}
				145	else {
				146	if ( zSign ) {
				147	increment = ( roundingMode == float_round_down ) && absZ1;
				148	}
				149	else {
				150	increment = ( roundingMode == float_round_up ) && absZ1;
				151	}
				152	}
				153	}
				154	if ( increment ) {
				155	++absZ0;
				156	if ( absZ0 == 0 ) goto overflow;
				157	absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
				158	}
				159	z = absZ0;
				160	if ( zSign ) z = - z;
				161	if ( z && ( ( z < 0 ) ^ zSign ) ) {
				162	overflow:
				163	float_raise( float_flag_invalid STATUS_VAR);
				164	return
				165	zSign ? (sbits64) LIT64( 0x8000000000000000 )
				166	: LIT64( 0x7FFFFFFFFFFFFFFF );
				167	}
				168	if ( absZ1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				169	return z;
				170
				171	}
				172
				173	/*----------------------------------------------------------------------------
				174	\| Returns the fraction bits of the single-precision floating-point value `a'.
				175	----------------------------------------------------------------------------/
				176
				177	INLINE bits32 extractFloat32Frac( float32 a )
				178	{
				179
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	180	return float32_val(a) & 0x007FFFFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	181
				182	}
				183
				184	/*----------------------------------------------------------------------------
				185	\| Returns the exponent bits of the single-precision floating-point value `a'.
				186	----------------------------------------------------------------------------/
				187
				188	INLINE int16 extractFloat32Exp( float32 a )
				189	{
				190
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	191	return ( float32_val(a)>>23 ) & 0xFF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	192
				193	}
				194
				195	/*----------------------------------------------------------------------------
				196	\| Returns the sign bit of the single-precision floating-point value `a'.
				197	----------------------------------------------------------------------------/
				198
				199	INLINE flag extractFloat32Sign( float32 a )
				200	{
				201
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	202	return float32_val(a)>>31;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	203
				204	}
				205
				206	/*----------------------------------------------------------------------------
				207	\| Normalizes the subnormal single-precision floating-point value represented
				208	\| by the denormalized significand `aSig'. The normalized exponent and
				209	\| significand are stored at the locations pointed to by `zExpPtr' and
				210	\| `zSigPtr', respectively.
				211	----------------------------------------------------------------------------/
				212
				213	static void
				214	normalizeFloat32Subnormal( bits32 aSig, int16 zExpPtr, bits32 zSigPtr )
				215	{
				216	int8 shiftCount;
				217
				218	shiftCount = countLeadingZeros32( aSig ) - 8;
				219	*zSigPtr = aSig<<shiftCount;
				220	*zExpPtr = 1 - shiftCount;
				221
				222	}
				223
				224	/*----------------------------------------------------------------------------
				225	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				226	\| single-precision floating-point value, returning the result. After being
				227	\| shifted into the proper positions, the three fields are simply added
				228	\| together to form the result. This means that any integer portion of `zSig'
				229	\| will be added into the exponent. Since a properly normalized significand
				230	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				231	\| than the desired result exponent whenever `zSig' is a complete, normalized
				232	\| significand.
				233	----------------------------------------------------------------------------/
				234
				235	INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
				236	{
				237
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	238	return make_float32(
				239	( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	240
				241	}
				242
				243	/*----------------------------------------------------------------------------
				244	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				245	\| and significand `zSig', and returns the proper single-precision floating-
				246	\| point value corresponding to the abstract input. Ordinarily, the abstract
				247	\| value is simply rounded and packed into the single-precision format, with
				248	\| the inexact exception raised if the abstract input cannot be represented
				249	\| exactly. However, if the abstract value is too large, the overflow and
				250	\| inexact exceptions are raised and an infinity or maximal finite value is
				251	\| returned. If the abstract value is too small, the input value is rounded to
				252	\| a subnormal number, and the underflow and inexact exceptions are raised if
				253	\| the abstract input cannot be represented exactly as a subnormal single-
				254	\| precision floating-point number.
				255	\| The input significand `zSig' has its binary point between bits 30
				256	\| and 29, which is 7 bits to the left of the usual location. This shifted
				257	\| significand must be normalized or smaller. If `zSig' is not normalized,
				258	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				259	\| and it must not require rounding. In the usual case that `zSig' is
				260	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				261	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				262	\| Binary Floating-Point Arithmetic.
				263	----------------------------------------------------------------------------/
				264
				265	static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
				266	{
				267	int8 roundingMode;
				268	flag roundNearestEven;
				269	int8 roundIncrement, roundBits;
				270	flag isTiny;
				271
				272	roundingMode = STATUS(float_rounding_mode);
				273	roundNearestEven = ( roundingMode == float_round_nearest_even );
				274	roundIncrement = 0x40;
				275	if ( ! roundNearestEven ) {
				276	if ( roundingMode == float_round_to_zero ) {
				277	roundIncrement = 0;
				278	}
				279	else {
				280	roundIncrement = 0x7F;
				281	if ( zSign ) {
				282	if ( roundingMode == float_round_up ) roundIncrement = 0;
				283	}
				284	else {
				285	if ( roundingMode == float_round_down ) roundIncrement = 0;
				286	}
				287	}
				288	}
				289	roundBits = zSig & 0x7F;
				290	if ( 0xFD <= (bits16) zExp ) {
				291	if ( ( 0xFD < zExp )
				292	\|\| ( ( zExp == 0xFD )
				293	&& ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
				294	) {
				295	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	296	return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	297	}
				298	if ( zExp < 0 ) {
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	299	if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	300	isTiny =
				301	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				302	\|\| ( zExp < -1 )
				303	\|\| ( zSig + roundIncrement < 0x80000000 );
				304	shift32RightJamming( zSig, - zExp, &zSig );
				305	zExp = 0;
				306	roundBits = zSig & 0x7F;
				307	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				308	}
				309	}
				310	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				311	zSig = ( zSig + roundIncrement )>>7;
				312	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
				313	if ( zSig == 0 ) zExp = 0;
				314	return packFloat32( zSign, zExp, zSig );
				315
				316	}
				317
				318	/*----------------------------------------------------------------------------
				319	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				320	\| and significand `zSig', and returns the proper single-precision floating-
				321	\| point value corresponding to the abstract input. This routine is just like
				322	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
				323	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				324	\| floating-point exponent.
				325	----------------------------------------------------------------------------/
				326
				327	static float32
				328	normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig STATUS_PARAM)
				329	{
				330	int8 shiftCount;
				331
				332	shiftCount = countLeadingZeros32( zSig ) - 1;
				333	return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				334
				335	}
				336
				337	/*----------------------------------------------------------------------------
				338	\| Returns the fraction bits of the double-precision floating-point value `a'.
				339	----------------------------------------------------------------------------/
				340
				341	INLINE bits64 extractFloat64Frac( float64 a )
				342	{
				343
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	344	return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	345
				346	}
				347
				348	/*----------------------------------------------------------------------------
				349	\| Returns the exponent bits of the double-precision floating-point value `a'.
				350	----------------------------------------------------------------------------/
				351
				352	INLINE int16 extractFloat64Exp( float64 a )
				353	{
				354
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	355	return ( float64_val(a)>>52 ) & 0x7FF;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	356
				357	}
				358
				359	/*----------------------------------------------------------------------------
				360	\| Returns the sign bit of the double-precision floating-point value `a'.
				361	----------------------------------------------------------------------------/
				362
				363	INLINE flag extractFloat64Sign( float64 a )
				364	{
				365
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	366	return float64_val(a)>>63;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	367
				368	}
				369
				370	/*----------------------------------------------------------------------------
				371	\| Normalizes the subnormal double-precision floating-point value represented
				372	\| by the denormalized significand `aSig'. The normalized exponent and
				373	\| significand are stored at the locations pointed to by `zExpPtr' and
				374	\| `zSigPtr', respectively.
				375	----------------------------------------------------------------------------/
				376
				377	static void
				378	normalizeFloat64Subnormal( bits64 aSig, int16 zExpPtr, bits64 zSigPtr )
				379	{
				380	int8 shiftCount;
				381
				382	shiftCount = countLeadingZeros64( aSig ) - 11;
				383	*zSigPtr = aSig<<shiftCount;
				384	*zExpPtr = 1 - shiftCount;
				385
				386	}
				387
				388	/*----------------------------------------------------------------------------
				389	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
				390	\| double-precision floating-point value, returning the result. After being
				391	\| shifted into the proper positions, the three fields are simply added
				392	\| together to form the result. This means that any integer portion of `zSig'
				393	\| will be added into the exponent. Since a properly normalized significand
				394	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
				395	\| than the desired result exponent whenever `zSig' is a complete, normalized
				396	\| significand.
				397	----------------------------------------------------------------------------/
				398
				399	INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
				400	{
				401
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	402	return make_float64(
				403	( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	404
				405	}
				406
				407	/*----------------------------------------------------------------------------
				408	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				409	\| and significand `zSig', and returns the proper double-precision floating-
				410	\| point value corresponding to the abstract input. Ordinarily, the abstract
				411	\| value is simply rounded and packed into the double-precision format, with
				412	\| the inexact exception raised if the abstract input cannot be represented
				413	\| exactly. However, if the abstract value is too large, the overflow and
				414	\| inexact exceptions are raised and an infinity or maximal finite value is
				415	\| returned. If the abstract value is too small, the input value is rounded
				416	\| to a subnormal number, and the underflow and inexact exceptions are raised
				417	\| if the abstract input cannot be represented exactly as a subnormal double-
				418	\| precision floating-point number.
				419	\| The input significand `zSig' has its binary point between bits 62
				420	\| and 61, which is 10 bits to the left of the usual location. This shifted
				421	\| significand must be normalized or smaller. If `zSig' is not normalized,
				422	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
				423	\| and it must not require rounding. In the usual case that `zSig' is
				424	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
				425	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
				426	\| Binary Floating-Point Arithmetic.
				427	----------------------------------------------------------------------------/
				428
				429	static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
				430	{
				431	int8 roundingMode;
				432	flag roundNearestEven;
				433	int16 roundIncrement, roundBits;
				434	flag isTiny;
				435
				436	roundingMode = STATUS(float_rounding_mode);
				437	roundNearestEven = ( roundingMode == float_round_nearest_even );
				438	roundIncrement = 0x200;
				439	if ( ! roundNearestEven ) {
				440	if ( roundingMode == float_round_to_zero ) {
				441	roundIncrement = 0;
				442	}
				443	else {
				444	roundIncrement = 0x3FF;
				445	if ( zSign ) {
				446	if ( roundingMode == float_round_up ) roundIncrement = 0;
				447	}
				448	else {
				449	if ( roundingMode == float_round_down ) roundIncrement = 0;
				450	}
				451	}
				452	}
				453	roundBits = zSig & 0x3FF;
				454	if ( 0x7FD <= (bits16) zExp ) {
				455	if ( ( 0x7FD < zExp )
				456	\|\| ( ( zExp == 0x7FD )
				457	&& ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
				458	) {
				459	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	460	return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	461	}
				462	if ( zExp < 0 ) {
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	463	if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	464	isTiny =
				465	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				466	\|\| ( zExp < -1 )
				467	\|\| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
				468	shift64RightJamming( zSig, - zExp, &zSig );
				469	zExp = 0;
				470	roundBits = zSig & 0x3FF;
				471	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				472	}
				473	}
				474	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				475	zSig = ( zSig + roundIncrement )>>10;
				476	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
				477	if ( zSig == 0 ) zExp = 0;
				478	return packFloat64( zSign, zExp, zSig );
				479
				480	}
				481
				482	/*----------------------------------------------------------------------------
				483	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				484	\| and significand `zSig', and returns the proper double-precision floating-
				485	\| point value corresponding to the abstract input. This routine is just like
				486	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
				487	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
				488	\| floating-point exponent.
				489	----------------------------------------------------------------------------/
				490
				491	static float64
				492	normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig STATUS_PARAM)
				493	{
				494	int8 shiftCount;
				495
				496	shiftCount = countLeadingZeros64( zSig ) - 1;
				497	return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
				498
				499	}
				500
				501	#ifdef FLOATX80
				502
				503	/*----------------------------------------------------------------------------
				504	\| Returns the fraction bits of the extended double-precision floating-point
				505	\| value `a'.
				506	----------------------------------------------------------------------------/
				507
				508	INLINE bits64 extractFloatx80Frac( floatx80 a )
				509	{
				510
				511	return a.low;
				512
				513	}
				514
				515	/*----------------------------------------------------------------------------
				516	\| Returns the exponent bits of the extended double-precision floating-point
				517	\| value `a'.
				518	----------------------------------------------------------------------------/
				519
				520	INLINE int32 extractFloatx80Exp( floatx80 a )
				521	{
				522
				523	return a.high & 0x7FFF;
				524
				525	}
				526
				527	/*----------------------------------------------------------------------------
				528	\| Returns the sign bit of the extended double-precision floating-point value
				529	\| `a'.
				530	----------------------------------------------------------------------------/
				531
				532	INLINE flag extractFloatx80Sign( floatx80 a )
				533	{
				534
				535	return a.high>>15;
				536
				537	}
				538
				539	/*----------------------------------------------------------------------------
				540	\| Normalizes the subnormal extended double-precision floating-point value
				541	\| represented by the denormalized significand `aSig'. The normalized exponent
				542	\| and significand are stored at the locations pointed to by `zExpPtr' and
				543	\| `zSigPtr', respectively.
				544	----------------------------------------------------------------------------/
				545
				546	static void
				547	normalizeFloatx80Subnormal( bits64 aSig, int32 zExpPtr, bits64 zSigPtr )
				548	{
				549	int8 shiftCount;
				550
				551	shiftCount = countLeadingZeros64( aSig );
				552	*zSigPtr = aSig<<shiftCount;
				553	*zExpPtr = 1 - shiftCount;
				554
				555	}
				556
				557	/*----------------------------------------------------------------------------
				558	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
				559	\| extended double-precision floating-point value, returning the result.
				560	----------------------------------------------------------------------------/
				561
				562	INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
				563	{
				564	floatx80 z;
				565
				566	z.low = zSig;
				567	z.high = ( ( (bits16) zSign )<<15 ) + zExp;
				568	return z;
				569
				570	}
				571
				572	/*----------------------------------------------------------------------------
				573	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				574	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
				575	\| and returns the proper extended double-precision floating-point value
				576	\| corresponding to the abstract input. Ordinarily, the abstract value is
				577	\| rounded and packed into the extended double-precision format, with the
				578	\| inexact exception raised if the abstract input cannot be represented
				579	\| exactly. However, if the abstract value is too large, the overflow and
				580	\| inexact exceptions are raised and an infinity or maximal finite value is
				581	\| returned. If the abstract value is too small, the input value is rounded to
				582	\| a subnormal number, and the underflow and inexact exceptions are raised if
				583	\| the abstract input cannot be represented exactly as a subnormal extended
				584	\| double-precision floating-point number.
				585	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
				586	\| number of bits as single or double precision, respectively. Otherwise, the
				587	\| result is rounded to the full precision of the extended double-precision
				588	\| format.
				589	\| The input significand must be normalized or smaller. If the input
				590	\| significand is not normalized, `zExp' must be 0; in that case, the result
				591	\| returned is a subnormal number, and it must not require rounding. The
				592	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
				593	\| Floating-Point Arithmetic.
				594	----------------------------------------------------------------------------/
				595
				596	static floatx80
				597	roundAndPackFloatx80(
				598	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				599	STATUS_PARAM)
				600	{
				601	int8 roundingMode;
				602	flag roundNearestEven, increment, isTiny;
				603	int64 roundIncrement, roundMask, roundBits;
				604
				605	roundingMode = STATUS(float_rounding_mode);
				606	roundNearestEven = ( roundingMode == float_round_nearest_even );
				607	if ( roundingPrecision == 80 ) goto precision80;
				608	if ( roundingPrecision == 64 ) {
				609	roundIncrement = LIT64( 0x0000000000000400 );
				610	roundMask = LIT64( 0x00000000000007FF );
				611	}
				612	else if ( roundingPrecision == 32 ) {
				613	roundIncrement = LIT64( 0x0000008000000000 );
				614	roundMask = LIT64( 0x000000FFFFFFFFFF );
				615	}
				616	else {
				617	goto precision80;
				618	}
				619	zSig0 \|= ( zSig1 != 0 );
				620	if ( ! roundNearestEven ) {
				621	if ( roundingMode == float_round_to_zero ) {
				622	roundIncrement = 0;
				623	}
				624	else {
				625	roundIncrement = roundMask;
				626	if ( zSign ) {
				627	if ( roundingMode == float_round_up ) roundIncrement = 0;
				628	}
				629	else {
				630	if ( roundingMode == float_round_down ) roundIncrement = 0;
				631	}
				632	}
				633	}
				634	roundBits = zSig0 & roundMask;
				635	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				636	if ( ( 0x7FFE < zExp )
				637	\|\| ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
				638	) {
				639	goto overflow;
				640	}
				641	if ( zExp <= 0 ) {
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	642	if ( STATUS(flush_to_zero) ) return packFloatx80( zSign, 0, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	643	isTiny =
				644	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				645	\|\| ( zExp < 0 )
				646	\|\| ( zSig0 <= zSig0 + roundIncrement );
				647	shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
				648	zExp = 0;
				649	roundBits = zSig0 & roundMask;
				650	if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
				651	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				652	zSig0 += roundIncrement;
				653	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				654	roundIncrement = roundMask + 1;
				655	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				656	roundMask \|= roundIncrement;
				657	}
				658	zSig0 &= ~ roundMask;
				659	return packFloatx80( zSign, zExp, zSig0 );
				660	}
				661	}
				662	if ( roundBits ) STATUS(float_exception_flags) \|= float_flag_inexact;
				663	zSig0 += roundIncrement;
				664	if ( zSig0 < roundIncrement ) {
				665	++zExp;
				666	zSig0 = LIT64( 0x8000000000000000 );
				667	}
				668	roundIncrement = roundMask + 1;
				669	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
				670	roundMask \|= roundIncrement;
				671	}
				672	zSig0 &= ~ roundMask;
				673	if ( zSig0 == 0 ) zExp = 0;
				674	return packFloatx80( zSign, zExp, zSig0 );
				675	precision80:
				676	increment = ( (sbits64) zSig1 < 0 );
				677	if ( ! roundNearestEven ) {
				678	if ( roundingMode == float_round_to_zero ) {
				679	increment = 0;
				680	}
				681	else {
				682	if ( zSign ) {
				683	increment = ( roundingMode == float_round_down ) && zSig1;
				684	}
				685	else {
				686	increment = ( roundingMode == float_round_up ) && zSig1;
				687	}
				688	}
				689	}
				690	if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
				691	if ( ( 0x7FFE < zExp )
				692	\|\| ( ( zExp == 0x7FFE )
				693	&& ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
				694	&& increment
				695	)
				696	) {
				697	roundMask = 0;
				698	overflow:
				699	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				700	if ( ( roundingMode == float_round_to_zero )
				701	\|\| ( zSign && ( roundingMode == float_round_up ) )
				702	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				703	) {
				704	return packFloatx80( zSign, 0x7FFE, ~ roundMask );
				705	}
				706	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				707	}
				708	if ( zExp <= 0 ) {
				709	isTiny =
				710	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				711	\|\| ( zExp < 0 )
				712	\|\| ! increment
				713	\|\| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
				714	shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
				715	zExp = 0;
				716	if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
				717	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				718	if ( roundNearestEven ) {
				719	increment = ( (sbits64) zSig1 < 0 );
				720	}
				721	else {
				722	if ( zSign ) {
				723	increment = ( roundingMode == float_round_down ) && zSig1;
				724	}
				725	else {
				726	increment = ( roundingMode == float_round_up ) && zSig1;
				727	}
				728	}
				729	if ( increment ) {
				730	++zSig0;
				731	zSig0 &=
				732	~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				733	if ( (sbits64) zSig0 < 0 ) zExp = 1;
				734	}
				735	return packFloatx80( zSign, zExp, zSig0 );
				736	}
				737	}
				738	if ( zSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				739	if ( increment ) {
				740	++zSig0;
				741	if ( zSig0 == 0 ) {
				742	++zExp;
				743	zSig0 = LIT64( 0x8000000000000000 );
				744	}
				745	else {
				746	zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
				747	}
				748	}
				749	else {
				750	if ( zSig0 == 0 ) zExp = 0;
				751	}
				752	return packFloatx80( zSign, zExp, zSig0 );
				753
				754	}
				755
				756	/*----------------------------------------------------------------------------
				757	\| Takes an abstract floating-point value having sign `zSign', exponent
				758	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
				759	\| and returns the proper extended double-precision floating-point value
				760	\| corresponding to the abstract input. This routine is just like
				761	\| `roundAndPackFloatx80' except that the input significand does not have to be
				762	\| normalized.
				763	----------------------------------------------------------------------------/
				764
				765	static floatx80
				766	normalizeRoundAndPackFloatx80(
				767	int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
				768	STATUS_PARAM)
				769	{
				770	int8 shiftCount;
				771
				772	if ( zSig0 == 0 ) {
				773	zSig0 = zSig1;
				774	zSig1 = 0;
				775	zExp -= 64;
				776	}
				777	shiftCount = countLeadingZeros64( zSig0 );
				778	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				779	zExp -= shiftCount;
				780	return
				781	roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
				782
				783	}
				784
				785	#endif
				786
				787	#ifdef FLOAT128
				788
				789	/*----------------------------------------------------------------------------
				790	\| Returns the least-significant 64 fraction bits of the quadruple-precision
				791	\| floating-point value `a'.
				792	----------------------------------------------------------------------------/
				793
				794	INLINE bits64 extractFloat128Frac1( float128 a )
				795	{
				796
				797	return a.low;
				798
				799	}
				800
				801	/*----------------------------------------------------------------------------
				802	\| Returns the most-significant 48 fraction bits of the quadruple-precision
				803	\| floating-point value `a'.
				804	----------------------------------------------------------------------------/
				805
				806	INLINE bits64 extractFloat128Frac0( float128 a )
				807	{
				808
				809	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
				810
				811	}
				812
				813	/*----------------------------------------------------------------------------
				814	\| Returns the exponent bits of the quadruple-precision floating-point value
				815	\| `a'.
				816	----------------------------------------------------------------------------/
				817
				818	INLINE int32 extractFloat128Exp( float128 a )
				819	{
				820
				821	return ( a.high>>48 ) & 0x7FFF;
				822
				823	}
				824
				825	/*----------------------------------------------------------------------------
				826	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
				827	----------------------------------------------------------------------------/
				828
				829	INLINE flag extractFloat128Sign( float128 a )
				830	{
				831
				832	return a.high>>63;
				833
				834	}
				835
				836	/*----------------------------------------------------------------------------
				837	\| Normalizes the subnormal quadruple-precision floating-point value
				838	\| represented by the denormalized significand formed by the concatenation of
				839	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
				840	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
				841	\| significand are stored at the location pointed to by `zSig0Ptr', and the
				842	\| least significant 64 bits of the normalized significand are stored at the
				843	\| location pointed to by `zSig1Ptr'.
				844	----------------------------------------------------------------------------/
				845
				846	static void
				847	normalizeFloat128Subnormal(
				848	bits64 aSig0,
				849	bits64 aSig1,
				850	int32 *zExpPtr,
				851	bits64 *zSig0Ptr,
				852	bits64 *zSig1Ptr
				853	)
				854	{
				855	int8 shiftCount;
				856
				857	if ( aSig0 == 0 ) {
				858	shiftCount = countLeadingZeros64( aSig1 ) - 15;
				859	if ( shiftCount < 0 ) {
				860	*zSig0Ptr = aSig1>>( - shiftCount );
				861	*zSig1Ptr = aSig1<<( shiftCount & 63 );
				862	}
				863	else {
				864	*zSig0Ptr = aSig1<<shiftCount;
				865	*zSig1Ptr = 0;
				866	}
				867	*zExpPtr = - shiftCount - 63;
				868	}
				869	else {
				870	shiftCount = countLeadingZeros64( aSig0 ) - 15;
				871	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
				872	*zExpPtr = 1 - shiftCount;
				873	}
				874
				875	}
				876
				877	/*----------------------------------------------------------------------------
				878	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
				879	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
				880	\| floating-point value, returning the result. After being shifted into the
				881	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
				882	\| added together to form the most significant 32 bits of the result. This
				883	\| means that any integer portion of `zSig0' will be added into the exponent.
				884	\| Since a properly normalized significand will have an integer portion equal
				885	\| to 1, the `zExp' input should be 1 less than the desired result exponent
				886	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
				887	\| significand.
				888	----------------------------------------------------------------------------/
				889
				890	INLINE float128
				891	packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
				892	{
				893	float128 z;
				894
				895	z.low = zSig1;
				896	z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
				897	return z;
				898
				899	}
				900
				901	/*----------------------------------------------------------------------------
				902	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				903	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
				904	\| and `zSig2', and returns the proper quadruple-precision floating-point value
				905	\| corresponding to the abstract input. Ordinarily, the abstract value is
				906	\| simply rounded and packed into the quadruple-precision format, with the
				907	\| inexact exception raised if the abstract input cannot be represented
				908	\| exactly. However, if the abstract value is too large, the overflow and
				909	\| inexact exceptions are raised and an infinity or maximal finite value is
				910	\| returned. If the abstract value is too small, the input value is rounded to
				911	\| a subnormal number, and the underflow and inexact exceptions are raised if
				912	\| the abstract input cannot be represented exactly as a subnormal quadruple-
				913	\| precision floating-point number.
				914	\| The input significand must be normalized or smaller. If the input
				915	\| significand is not normalized, `zExp' must be 0; in that case, the result
				916	\| returned is a subnormal number, and it must not require rounding. In the
				917	\| usual case that the input significand is normalized, `zExp' must be 1 less
				918	\| than the ``true'' floating-point exponent. The handling of underflow and
				919	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				920	----------------------------------------------------------------------------/
				921
				922	static float128
				923	roundAndPackFloat128(
				924	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM)
				925	{
				926	int8 roundingMode;
				927	flag roundNearestEven, increment, isTiny;
				928
				929	roundingMode = STATUS(float_rounding_mode);
				930	roundNearestEven = ( roundingMode == float_round_nearest_even );
				931	increment = ( (sbits64) zSig2 < 0 );
				932	if ( ! roundNearestEven ) {
				933	if ( roundingMode == float_round_to_zero ) {
				934	increment = 0;
				935	}
				936	else {
				937	if ( zSign ) {
				938	increment = ( roundingMode == float_round_down ) && zSig2;
				939	}
				940	else {
				941	increment = ( roundingMode == float_round_up ) && zSig2;
				942	}
				943	}
				944	}
				945	if ( 0x7FFD <= (bits32) zExp ) {
				946	if ( ( 0x7FFD < zExp )
				947	\|\| ( ( zExp == 0x7FFD )
				948	&& eq128(
				949	LIT64( 0x0001FFFFFFFFFFFF ),
				950	LIT64( 0xFFFFFFFFFFFFFFFF ),
				951	zSig0,
				952	zSig1
				953	)
				954	&& increment
				955	)
				956	) {
				957	float_raise( float_flag_overflow \| float_flag_inexact STATUS_VAR);
				958	if ( ( roundingMode == float_round_to_zero )
				959	\|\| ( zSign && ( roundingMode == float_round_up ) )
				960	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
				961	) {
				962	return
				963	packFloat128(
				964	zSign,
				965	0x7FFE,
				966	LIT64( 0x0000FFFFFFFFFFFF ),
				967	LIT64( 0xFFFFFFFFFFFFFFFF )
				968	);
				969	}
				970	return packFloat128( zSign, 0x7FFF, 0, 0 );
				971	}
				972	if ( zExp < 0 ) {
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	973	if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	974	isTiny =
				975	( STATUS(float_detect_tininess) == float_tininess_before_rounding )
				976	\|\| ( zExp < -1 )
				977	\|\| ! increment
				978	\|\| lt128(
				979	zSig0,
				980	zSig1,
				981	LIT64( 0x0001FFFFFFFFFFFF ),
				982	LIT64( 0xFFFFFFFFFFFFFFFF )
				983	);
				984	shift128ExtraRightJamming(
				985	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
				986	zExp = 0;
				987	if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
				988	if ( roundNearestEven ) {
				989	increment = ( (sbits64) zSig2 < 0 );
				990	}
				991	else {
				992	if ( zSign ) {
				993	increment = ( roundingMode == float_round_down ) && zSig2;
				994	}
				995	else {
				996	increment = ( roundingMode == float_round_up ) && zSig2;
				997	}
				998	}
				999	}
				1000	}
				1001	if ( zSig2 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1002	if ( increment ) {
				1003	add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
				1004	zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
				1005	}
				1006	else {
				1007	if ( ( zSig0 \| zSig1 ) == 0 ) zExp = 0;
				1008	}
				1009	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1010
				1011	}
				1012
				1013	/*----------------------------------------------------------------------------
				1014	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
				1015	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
				1016	\| returns the proper quadruple-precision floating-point value corresponding
				1017	\| to the abstract input. This routine is just like `roundAndPackFloat128'
				1018	\| except that the input significand has fewer bits and does not have to be
				1019	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
				1020	\| point exponent.
				1021	----------------------------------------------------------------------------/
				1022
				1023	static float128
				1024	normalizeRoundAndPackFloat128(
				1025	flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM)
				1026	{
				1027	int8 shiftCount;
				1028	bits64 zSig2;
				1029
				1030	if ( zSig0 == 0 ) {
				1031	zSig0 = zSig1;
				1032	zSig1 = 0;
				1033	zExp -= 64;
				1034	}
				1035	shiftCount = countLeadingZeros64( zSig0 ) - 15;
				1036	if ( 0 <= shiftCount ) {
				1037	zSig2 = 0;
				1038	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1039	}
				1040	else {
				1041	shift128ExtraRightJamming(
				1042	zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
				1043	}
				1044	zExp -= shiftCount;
				1045	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
				1046
				1047	}
				1048
				1049	#endif
				1050
				1051	/*----------------------------------------------------------------------------
				1052	\| Returns the result of converting the 32-bit two's complement integer `a'
				1053	\| to the single-precision floating-point format. The conversion is performed
				1054	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1055	----------------------------------------------------------------------------/
				1056
				1057	float32 int32_to_float32( int32 a STATUS_PARAM )
				1058	{
				1059	flag zSign;
				1060
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1061	if ( a == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1062	if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
				1063	zSign = ( a < 0 );
				1064	return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
				1065
				1066	}
				1067
				1068	/*----------------------------------------------------------------------------
				1069	\| Returns the result of converting the 32-bit two's complement integer `a'
				1070	\| to the double-precision floating-point format. The conversion is performed
				1071	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1072	----------------------------------------------------------------------------/
				1073
				1074	float64 int32_to_float64( int32 a STATUS_PARAM )
				1075	{
				1076	flag zSign;
				1077	uint32 absA;
				1078	int8 shiftCount;
				1079	bits64 zSig;
				1080
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1081	if ( a == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1082	zSign = ( a < 0 );
				1083	absA = zSign ? - a : a;
				1084	shiftCount = countLeadingZeros32( absA ) + 21;
				1085	zSig = absA;
				1086	return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
				1087
				1088	}
				1089
				1090	#ifdef FLOATX80
				1091
				1092	/*----------------------------------------------------------------------------
				1093	\| Returns the result of converting the 32-bit two's complement integer `a'
				1094	\| to the extended double-precision floating-point format. The conversion
				1095	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1096	\| Arithmetic.
				1097	----------------------------------------------------------------------------/
				1098
				1099	floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
				1100	{
				1101	flag zSign;
				1102	uint32 absA;
				1103	int8 shiftCount;
				1104	bits64 zSig;
				1105
				1106	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1107	zSign = ( a < 0 );
				1108	absA = zSign ? - a : a;
				1109	shiftCount = countLeadingZeros32( absA ) + 32;
				1110	zSig = absA;
				1111	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
				1112
				1113	}
				1114
				1115	#endif
				1116
				1117	#ifdef FLOAT128
				1118
				1119	/*----------------------------------------------------------------------------
				1120	\| Returns the result of converting the 32-bit two's complement integer `a' to
				1121	\| the quadruple-precision floating-point format. The conversion is performed
				1122	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1123	----------------------------------------------------------------------------/
				1124
				1125	float128 int32_to_float128( int32 a STATUS_PARAM )
				1126	{
				1127	flag zSign;
				1128	uint32 absA;
				1129	int8 shiftCount;
				1130	bits64 zSig0;
				1131
				1132	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1133	zSign = ( a < 0 );
				1134	absA = zSign ? - a : a;
				1135	shiftCount = countLeadingZeros32( absA ) + 17;
				1136	zSig0 = absA;
				1137	return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
				1138
				1139	}
				1140
				1141	#endif
				1142
				1143	/*----------------------------------------------------------------------------
				1144	\| Returns the result of converting the 64-bit two's complement integer `a'
				1145	\| to the single-precision floating-point format. The conversion is performed
				1146	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1147	----------------------------------------------------------------------------/
				1148
				1149	float32 int64_to_float32( int64 a STATUS_PARAM )
				1150	{
				1151	flag zSign;
				1152	uint64 absA;
				1153	int8 shiftCount;
				1154
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1155	if ( a == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1156	zSign = ( a < 0 );
				1157	absA = zSign ? - a : a;
				1158	shiftCount = countLeadingZeros64( absA ) - 40;
				1159	if ( 0 <= shiftCount ) {
				1160	return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
				1161	}
				1162	else {
				1163	shiftCount += 7;
				1164	if ( shiftCount < 0 ) {
				1165	shift64RightJamming( absA, - shiftCount, &absA );
				1166	}
				1167	else {
				1168	absA <<= shiftCount;
				1169	}
				1170	return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
				1171	}
				1172
				1173	}
				1174
j_mayer	3430b0b	2007-03-20 22:25:37 +0000	[diff] [blame]	1175	float32 uint64_to_float32( uint64 a STATUS_PARAM )
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1176	{
				1177	int8 shiftCount;
				1178
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1179	if ( a == 0 ) return float32_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1180	shiftCount = countLeadingZeros64( a ) - 40;
				1181	if ( 0 <= shiftCount ) {
				1182	return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
				1183	}
				1184	else {
				1185	shiftCount += 7;
				1186	if ( shiftCount < 0 ) {
				1187	shift64RightJamming( a, - shiftCount, &a );
				1188	}
				1189	else {
				1190	a <<= shiftCount;
				1191	}
				1192	return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
				1193	}
				1194	}
				1195
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1196	/*----------------------------------------------------------------------------
				1197	\| Returns the result of converting the 64-bit two's complement integer `a'
				1198	\| to the double-precision floating-point format. The conversion is performed
				1199	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1200	----------------------------------------------------------------------------/
				1201
				1202	float64 int64_to_float64( int64 a STATUS_PARAM )
				1203	{
				1204	flag zSign;
				1205
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1206	if ( a == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1207	if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
				1208	return packFloat64( 1, 0x43E, 0 );
				1209	}
				1210	zSign = ( a < 0 );
				1211	return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
				1212
				1213	}
				1214
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1215	float64 uint64_to_float64( uint64 a STATUS_PARAM )
				1216	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1217	if ( a == 0 ) return float64_zero;
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	1218	return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
				1219
				1220	}
				1221
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1222	#ifdef FLOATX80
				1223
				1224	/*----------------------------------------------------------------------------
				1225	\| Returns the result of converting the 64-bit two's complement integer `a'
				1226	\| to the extended double-precision floating-point format. The conversion
				1227	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1228	\| Arithmetic.
				1229	----------------------------------------------------------------------------/
				1230
				1231	floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
				1232	{
				1233	flag zSign;
				1234	uint64 absA;
				1235	int8 shiftCount;
				1236
				1237	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
				1238	zSign = ( a < 0 );
				1239	absA = zSign ? - a : a;
				1240	shiftCount = countLeadingZeros64( absA );
				1241	return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
				1242
				1243	}
				1244
				1245	#endif
				1246
				1247	#ifdef FLOAT128
				1248
				1249	/*----------------------------------------------------------------------------
				1250	\| Returns the result of converting the 64-bit two's complement integer `a' to
				1251	\| the quadruple-precision floating-point format. The conversion is performed
				1252	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1253	----------------------------------------------------------------------------/
				1254
				1255	float128 int64_to_float128( int64 a STATUS_PARAM )
				1256	{
				1257	flag zSign;
				1258	uint64 absA;
				1259	int8 shiftCount;
				1260	int32 zExp;
				1261	bits64 zSig0, zSig1;
				1262
				1263	if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
				1264	zSign = ( a < 0 );
				1265	absA = zSign ? - a : a;
				1266	shiftCount = countLeadingZeros64( absA ) + 49;
				1267	zExp = 0x406E - shiftCount;
				1268	if ( 64 <= shiftCount ) {
				1269	zSig1 = 0;
				1270	zSig0 = absA;
				1271	shiftCount -= 64;
				1272	}
				1273	else {
				1274	zSig1 = absA;
				1275	zSig0 = 0;
				1276	}
				1277	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
				1278	return packFloat128( zSign, zExp, zSig0, zSig1 );
				1279
				1280	}
				1281
				1282	#endif
				1283
				1284	/*----------------------------------------------------------------------------
				1285	\| Returns the result of converting the single-precision floating-point value
				1286	\| `a' to the 32-bit two's complement integer format. The conversion is
				1287	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1288	\| Arithmetic---which means in particular that the conversion is rounded
				1289	\| according to the current rounding mode. If `a' is a NaN, the largest
				1290	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1291	\| largest integer with the same sign as `a' is returned.
				1292	----------------------------------------------------------------------------/
				1293
				1294	int32 float32_to_int32( float32 a STATUS_PARAM )
				1295	{
				1296	flag aSign;
				1297	int16 aExp, shiftCount;
				1298	bits32 aSig;
				1299	bits64 aSig64;
				1300
				1301	aSig = extractFloat32Frac( a );
				1302	aExp = extractFloat32Exp( a );
				1303	aSign = extractFloat32Sign( a );
				1304	if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
				1305	if ( aExp ) aSig \|= 0x00800000;
				1306	shiftCount = 0xAF - aExp;
				1307	aSig64 = aSig;
				1308	aSig64 <<= 32;
				1309	if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
				1310	return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
				1311
				1312	}
				1313
				1314	/*----------------------------------------------------------------------------
				1315	\| Returns the result of converting the single-precision floating-point value
				1316	\| `a' to the 32-bit two's complement integer format. The conversion is
				1317	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1318	\| Arithmetic, except that the conversion is always rounded toward zero.
				1319	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				1320	\| the conversion overflows, the largest integer with the same sign as `a' is
				1321	\| returned.
				1322	----------------------------------------------------------------------------/
				1323
				1324	int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
				1325	{
				1326	flag aSign;
				1327	int16 aExp, shiftCount;
				1328	bits32 aSig;
				1329	int32 z;
				1330
				1331	aSig = extractFloat32Frac( a );
				1332	aExp = extractFloat32Exp( a );
				1333	aSign = extractFloat32Sign( a );
				1334	shiftCount = aExp - 0x9E;
				1335	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1336	if ( float32_val(a) != 0xCF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1337	float_raise( float_flag_invalid STATUS_VAR);
				1338	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
				1339	}
				1340	return (sbits32) 0x80000000;
				1341	}
				1342	else if ( aExp <= 0x7E ) {
				1343	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1344	return 0;
				1345	}
				1346	aSig = ( aSig \| 0x00800000 )<<8;
				1347	z = aSig>>( - shiftCount );
				1348	if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
				1349	STATUS(float_exception_flags) \|= float_flag_inexact;
				1350	}
				1351	if ( aSign ) z = - z;
				1352	return z;
				1353
				1354	}
				1355
				1356	/*----------------------------------------------------------------------------
				1357	\| Returns the result of converting the single-precision floating-point value
				1358	\| `a' to the 64-bit two's complement integer format. The conversion is
				1359	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1360	\| Arithmetic---which means in particular that the conversion is rounded
				1361	\| according to the current rounding mode. If `a' is a NaN, the largest
				1362	\| positive integer is returned. Otherwise, if the conversion overflows, the
				1363	\| largest integer with the same sign as `a' is returned.
				1364	----------------------------------------------------------------------------/
				1365
				1366	int64 float32_to_int64( float32 a STATUS_PARAM )
				1367	{
				1368	flag aSign;
				1369	int16 aExp, shiftCount;
				1370	bits32 aSig;
				1371	bits64 aSig64, aSigExtra;
				1372
				1373	aSig = extractFloat32Frac( a );
				1374	aExp = extractFloat32Exp( a );
				1375	aSign = extractFloat32Sign( a );
				1376	shiftCount = 0xBE - aExp;
				1377	if ( shiftCount < 0 ) {
				1378	float_raise( float_flag_invalid STATUS_VAR);
				1379	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1380	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1381	}
				1382	return (sbits64) LIT64( 0x8000000000000000 );
				1383	}
				1384	if ( aExp ) aSig \|= 0x00800000;
				1385	aSig64 = aSig;
				1386	aSig64 <<= 40;
				1387	shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
				1388	return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
				1389
				1390	}
				1391
				1392	/*----------------------------------------------------------------------------
				1393	\| Returns the result of converting the single-precision floating-point value
				1394	\| `a' to the 64-bit two's complement integer format. The conversion is
				1395	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1396	\| Arithmetic, except that the conversion is always rounded toward zero. If
				1397	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				1398	\| conversion overflows, the largest integer with the same sign as `a' is
				1399	\| returned.
				1400	----------------------------------------------------------------------------/
				1401
				1402	int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
				1403	{
				1404	flag aSign;
				1405	int16 aExp, shiftCount;
				1406	bits32 aSig;
				1407	bits64 aSig64;
				1408	int64 z;
				1409
				1410	aSig = extractFloat32Frac( a );
				1411	aExp = extractFloat32Exp( a );
				1412	aSign = extractFloat32Sign( a );
				1413	shiftCount = aExp - 0xBE;
				1414	if ( 0 <= shiftCount ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1415	if ( float32_val(a) != 0xDF000000 ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1416	float_raise( float_flag_invalid STATUS_VAR);
				1417	if ( ! aSign \|\| ( ( aExp == 0xFF ) && aSig ) ) {
				1418	return LIT64( 0x7FFFFFFFFFFFFFFF );
				1419	}
				1420	}
				1421	return (sbits64) LIT64( 0x8000000000000000 );
				1422	}
				1423	else if ( aExp <= 0x7E ) {
				1424	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1425	return 0;
				1426	}
				1427	aSig64 = aSig \| 0x00800000;
				1428	aSig64 <<= 40;
				1429	z = aSig64>>( - shiftCount );
				1430	if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
				1431	STATUS(float_exception_flags) \|= float_flag_inexact;
				1432	}
				1433	if ( aSign ) z = - z;
				1434	return z;
				1435
				1436	}
				1437
				1438	/*----------------------------------------------------------------------------
				1439	\| Returns the result of converting the single-precision floating-point value
				1440	\| `a' to the double-precision floating-point format. The conversion is
				1441	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1442	\| Arithmetic.
				1443	----------------------------------------------------------------------------/
				1444
				1445	float64 float32_to_float64( float32 a STATUS_PARAM )
				1446	{
				1447	flag aSign;
				1448	int16 aExp;
				1449	bits32 aSig;
				1450
				1451	aSig = extractFloat32Frac( a );
				1452	aExp = extractFloat32Exp( a );
				1453	aSign = extractFloat32Sign( a );
				1454	if ( aExp == 0xFF ) {
				1455	if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ));
				1456	return packFloat64( aSign, 0x7FF, 0 );
				1457	}
				1458	if ( aExp == 0 ) {
				1459	if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
				1460	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1461	--aExp;
				1462	}
				1463	return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
				1464
				1465	}
				1466
				1467	#ifdef FLOATX80
				1468
				1469	/*----------------------------------------------------------------------------
				1470	\| Returns the result of converting the single-precision floating-point value
				1471	\| `a' to the extended double-precision floating-point format. The conversion
				1472	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				1473	\| Arithmetic.
				1474	----------------------------------------------------------------------------/
				1475
				1476	floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
				1477	{
				1478	flag aSign;
				1479	int16 aExp;
				1480	bits32 aSig;
				1481
				1482	aSig = extractFloat32Frac( a );
				1483	aExp = extractFloat32Exp( a );
				1484	aSign = extractFloat32Sign( a );
				1485	if ( aExp == 0xFF ) {
				1486	if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) );
				1487	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				1488	}
				1489	if ( aExp == 0 ) {
				1490	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				1491	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1492	}
				1493	aSig \|= 0x00800000;
				1494	return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
				1495
				1496	}
				1497
				1498	#endif
				1499
				1500	#ifdef FLOAT128
				1501
				1502	/*----------------------------------------------------------------------------
				1503	\| Returns the result of converting the single-precision floating-point value
				1504	\| `a' to the double-precision floating-point format. The conversion is
				1505	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				1506	\| Arithmetic.
				1507	----------------------------------------------------------------------------/
				1508
				1509	float128 float32_to_float128( float32 a STATUS_PARAM )
				1510	{
				1511	flag aSign;
				1512	int16 aExp;
				1513	bits32 aSig;
				1514
				1515	aSig = extractFloat32Frac( a );
				1516	aExp = extractFloat32Exp( a );
				1517	aSign = extractFloat32Sign( a );
				1518	if ( aExp == 0xFF ) {
				1519	if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) );
				1520	return packFloat128( aSign, 0x7FFF, 0, 0 );
				1521	}
				1522	if ( aExp == 0 ) {
				1523	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				1524	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1525	--aExp;
				1526	}
				1527	return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
				1528
				1529	}
				1530
				1531	#endif
				1532
				1533	/*----------------------------------------------------------------------------
				1534	\| Rounds the single-precision floating-point value `a' to an integer, and
				1535	\| returns the result as a single-precision floating-point value. The
				1536	\| operation is performed according to the IEC/IEEE Standard for Binary
				1537	\| Floating-Point Arithmetic.
				1538	----------------------------------------------------------------------------/
				1539
				1540	float32 float32_round_to_int( float32 a STATUS_PARAM)
				1541	{
				1542	flag aSign;
				1543	int16 aExp;
				1544	bits32 lastBitMask, roundBitsMask;
				1545	int8 roundingMode;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1546	bits32 z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1547
				1548	aExp = extractFloat32Exp( a );
				1549	if ( 0x96 <= aExp ) {
				1550	if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
				1551	return propagateFloat32NaN( a, a STATUS_VAR );
				1552	}
				1553	return a;
				1554	}
				1555	if ( aExp <= 0x7E ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1556	if ( (bits32) ( float32_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1557	STATUS(float_exception_flags) \|= float_flag_inexact;
				1558	aSign = extractFloat32Sign( a );
				1559	switch ( STATUS(float_rounding_mode) ) {
				1560	case float_round_nearest_even:
				1561	if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
				1562	return packFloat32( aSign, 0x7F, 0 );
				1563	}
				1564	break;
				1565	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1566	return make_float32(aSign ? 0xBF800000 : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1567	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1568	return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1569	}
				1570	return packFloat32( aSign, 0, 0 );
				1571	}
				1572	lastBitMask = 1;
				1573	lastBitMask <<= 0x96 - aExp;
				1574	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1575	z = float32_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1576	roundingMode = STATUS(float_rounding_mode);
				1577	if ( roundingMode == float_round_nearest_even ) {
				1578	z += lastBitMask>>1;
				1579	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				1580	}
				1581	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1582	if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1583	z += roundBitsMask;
				1584	}
				1585	}
				1586	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	1587	if ( z != float32_val(a) ) STATUS(float_exception_flags) \|= float_flag_inexact;
				1588	return make_float32(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1589
				1590	}
				1591
				1592	/*----------------------------------------------------------------------------
				1593	\| Returns the result of adding the absolute values of the single-precision
				1594	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				1595	\| before being returned. `zSign' is ignored if the result is a NaN.
				1596	\| The addition is performed according to the IEC/IEEE Standard for Binary
				1597	\| Floating-Point Arithmetic.
				1598	----------------------------------------------------------------------------/
				1599
				1600	static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1601	{
				1602	int16 aExp, bExp, zExp;
				1603	bits32 aSig, bSig, zSig;
				1604	int16 expDiff;
				1605
				1606	aSig = extractFloat32Frac( a );
				1607	aExp = extractFloat32Exp( a );
				1608	bSig = extractFloat32Frac( b );
				1609	bExp = extractFloat32Exp( b );
				1610	expDiff = aExp - bExp;
				1611	aSig <<= 6;
				1612	bSig <<= 6;
				1613	if ( 0 < expDiff ) {
				1614	if ( aExp == 0xFF ) {
				1615	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1616	return a;
				1617	}
				1618	if ( bExp == 0 ) {
				1619	--expDiff;
				1620	}
				1621	else {
				1622	bSig \|= 0x20000000;
				1623	}
				1624	shift32RightJamming( bSig, expDiff, &bSig );
				1625	zExp = aExp;
				1626	}
				1627	else if ( expDiff < 0 ) {
				1628	if ( bExp == 0xFF ) {
				1629	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1630	return packFloat32( zSign, 0xFF, 0 );
				1631	}
				1632	if ( aExp == 0 ) {
				1633	++expDiff;
				1634	}
				1635	else {
				1636	aSig \|= 0x20000000;
				1637	}
				1638	shift32RightJamming( aSig, - expDiff, &aSig );
				1639	zExp = bExp;
				1640	}
				1641	else {
				1642	if ( aExp == 0xFF ) {
				1643	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1644	return a;
				1645	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	1646	if ( aExp == 0 ) {
				1647	if ( STATUS(flush_to_zero) ) return packFloat32( zSign, 0, 0 );
				1648	return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
				1649	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	1650	zSig = 0x40000000 + aSig + bSig;
				1651	zExp = aExp;
				1652	goto roundAndPack;
				1653	}
				1654	aSig \|= 0x20000000;
				1655	zSig = ( aSig + bSig )<<1;
				1656	--zExp;
				1657	if ( (sbits32) zSig < 0 ) {
				1658	zSig = aSig + bSig;
				1659	++zExp;
				1660	}
				1661	roundAndPack:
				1662	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1663
				1664	}
				1665
				1666	/*----------------------------------------------------------------------------
				1667	\| Returns the result of subtracting the absolute values of the single-
				1668	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				1669	\| difference is negated before being returned. `zSign' is ignored if the
				1670	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				1671	\| Standard for Binary Floating-Point Arithmetic.
				1672	----------------------------------------------------------------------------/
				1673
				1674	static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
				1675	{
				1676	int16 aExp, bExp, zExp;
				1677	bits32 aSig, bSig, zSig;
				1678	int16 expDiff;
				1679
				1680	aSig = extractFloat32Frac( a );
				1681	aExp = extractFloat32Exp( a );
				1682	bSig = extractFloat32Frac( b );
				1683	bExp = extractFloat32Exp( b );
				1684	expDiff = aExp - bExp;
				1685	aSig <<= 7;
				1686	bSig <<= 7;
				1687	if ( 0 < expDiff ) goto aExpBigger;
				1688	if ( expDiff < 0 ) goto bExpBigger;
				1689	if ( aExp == 0xFF ) {
				1690	if ( aSig \| bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1691	float_raise( float_flag_invalid STATUS_VAR);
				1692	return float32_default_nan;
				1693	}
				1694	if ( aExp == 0 ) {
				1695	aExp = 1;
				1696	bExp = 1;
				1697	}
				1698	if ( bSig < aSig ) goto aBigger;
				1699	if ( aSig < bSig ) goto bBigger;
				1700	return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				1701	bExpBigger:
				1702	if ( bExp == 0xFF ) {
				1703	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1704	return packFloat32( zSign ^ 1, 0xFF, 0 );
				1705	}
				1706	if ( aExp == 0 ) {
				1707	++expDiff;
				1708	}
				1709	else {
				1710	aSig \|= 0x40000000;
				1711	}
				1712	shift32RightJamming( aSig, - expDiff, &aSig );
				1713	bSig \|= 0x40000000;
				1714	bBigger:
				1715	zSig = bSig - aSig;
				1716	zExp = bExp;
				1717	zSign ^= 1;
				1718	goto normalizeRoundAndPack;
				1719	aExpBigger:
				1720	if ( aExp == 0xFF ) {
				1721	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1722	return a;
				1723	}
				1724	if ( bExp == 0 ) {
				1725	--expDiff;
				1726	}
				1727	else {
				1728	bSig \|= 0x40000000;
				1729	}
				1730	shift32RightJamming( bSig, expDiff, &bSig );
				1731	aSig \|= 0x40000000;
				1732	aBigger:
				1733	zSig = aSig - bSig;
				1734	zExp = aExp;
				1735	normalizeRoundAndPack:
				1736	--zExp;
				1737	return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1738
				1739	}
				1740
				1741	/*----------------------------------------------------------------------------
				1742	\| Returns the result of adding the single-precision floating-point values `a'
				1743	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				1744	\| Binary Floating-Point Arithmetic.
				1745	----------------------------------------------------------------------------/
				1746
				1747	float32 float32_add( float32 a, float32 b STATUS_PARAM )
				1748	{
				1749	flag aSign, bSign;
				1750
				1751	aSign = extractFloat32Sign( a );
				1752	bSign = extractFloat32Sign( b );
				1753	if ( aSign == bSign ) {
				1754	return addFloat32Sigs( a, b, aSign STATUS_VAR);
				1755	}
				1756	else {
				1757	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1758	}
				1759
				1760	}
				1761
				1762	/*----------------------------------------------------------------------------
				1763	\| Returns the result of subtracting the single-precision floating-point values
				1764	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1765	\| for Binary Floating-Point Arithmetic.
				1766	----------------------------------------------------------------------------/
				1767
				1768	float32 float32_sub( float32 a, float32 b STATUS_PARAM )
				1769	{
				1770	flag aSign, bSign;
				1771
				1772	aSign = extractFloat32Sign( a );
				1773	bSign = extractFloat32Sign( b );
				1774	if ( aSign == bSign ) {
				1775	return subFloat32Sigs( a, b, aSign STATUS_VAR );
				1776	}
				1777	else {
				1778	return addFloat32Sigs( a, b, aSign STATUS_VAR );
				1779	}
				1780
				1781	}
				1782
				1783	/*----------------------------------------------------------------------------
				1784	\| Returns the result of multiplying the single-precision floating-point values
				1785	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				1786	\| for Binary Floating-Point Arithmetic.
				1787	----------------------------------------------------------------------------/
				1788
				1789	float32 float32_mul( float32 a, float32 b STATUS_PARAM )
				1790	{
				1791	flag aSign, bSign, zSign;
				1792	int16 aExp, bExp, zExp;
				1793	bits32 aSig, bSig;
				1794	bits64 zSig64;
				1795	bits32 zSig;
				1796
				1797	aSig = extractFloat32Frac( a );
				1798	aExp = extractFloat32Exp( a );
				1799	aSign = extractFloat32Sign( a );
				1800	bSig = extractFloat32Frac( b );
				1801	bExp = extractFloat32Exp( b );
				1802	bSign = extractFloat32Sign( b );
				1803	zSign = aSign ^ bSign;
				1804	if ( aExp == 0xFF ) {
				1805	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1806	return propagateFloat32NaN( a, b STATUS_VAR );
				1807	}
				1808	if ( ( bExp \| bSig ) == 0 ) {
				1809	float_raise( float_flag_invalid STATUS_VAR);
				1810	return float32_default_nan;
				1811	}
				1812	return packFloat32( zSign, 0xFF, 0 );
				1813	}
				1814	if ( bExp == 0xFF ) {
				1815	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1816	if ( ( aExp \| aSig ) == 0 ) {
				1817	float_raise( float_flag_invalid STATUS_VAR);
				1818	return float32_default_nan;
				1819	}
				1820	return packFloat32( zSign, 0xFF, 0 );
				1821	}
				1822	if ( aExp == 0 ) {
				1823	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1824	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1825	}
				1826	if ( bExp == 0 ) {
				1827	if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
				1828	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1829	}
				1830	zExp = aExp + bExp - 0x7F;
				1831	aSig = ( aSig \| 0x00800000 )<<7;
				1832	bSig = ( bSig \| 0x00800000 )<<8;
				1833	shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
				1834	zSig = zSig64;
				1835	if ( 0 <= (sbits32) ( zSig<<1 ) ) {
				1836	zSig <<= 1;
				1837	--zExp;
				1838	}
				1839	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1840
				1841	}
				1842
				1843	/*----------------------------------------------------------------------------
				1844	\| Returns the result of dividing the single-precision floating-point value `a'
				1845	\| by the corresponding value `b'. The operation is performed according to the
				1846	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1847	----------------------------------------------------------------------------/
				1848
				1849	float32 float32_div( float32 a, float32 b STATUS_PARAM )
				1850	{
				1851	flag aSign, bSign, zSign;
				1852	int16 aExp, bExp, zExp;
				1853	bits32 aSig, bSig, zSig;
				1854
				1855	aSig = extractFloat32Frac( a );
				1856	aExp = extractFloat32Exp( a );
				1857	aSign = extractFloat32Sign( a );
				1858	bSig = extractFloat32Frac( b );
				1859	bExp = extractFloat32Exp( b );
				1860	bSign = extractFloat32Sign( b );
				1861	zSign = aSign ^ bSign;
				1862	if ( aExp == 0xFF ) {
				1863	if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1864	if ( bExp == 0xFF ) {
				1865	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1866	float_raise( float_flag_invalid STATUS_VAR);
				1867	return float32_default_nan;
				1868	}
				1869	return packFloat32( zSign, 0xFF, 0 );
				1870	}
				1871	if ( bExp == 0xFF ) {
				1872	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1873	return packFloat32( zSign, 0, 0 );
				1874	}
				1875	if ( bExp == 0 ) {
				1876	if ( bSig == 0 ) {
				1877	if ( ( aExp \| aSig ) == 0 ) {
				1878	float_raise( float_flag_invalid STATUS_VAR);
				1879	return float32_default_nan;
				1880	}
				1881	float_raise( float_flag_divbyzero STATUS_VAR);
				1882	return packFloat32( zSign, 0xFF, 0 );
				1883	}
				1884	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1885	}
				1886	if ( aExp == 0 ) {
				1887	if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
				1888	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1889	}
				1890	zExp = aExp - bExp + 0x7D;
				1891	aSig = ( aSig \| 0x00800000 )<<7;
				1892	bSig = ( bSig \| 0x00800000 )<<8;
				1893	if ( bSig <= ( aSig + aSig ) ) {
				1894	aSig >>= 1;
				1895	++zExp;
				1896	}
				1897	zSig = ( ( (bits64) aSig )<<32 ) / bSig;
				1898	if ( ( zSig & 0x3F ) == 0 ) {
				1899	zSig \|= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
				1900	}
				1901	return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
				1902
				1903	}
				1904
				1905	/*----------------------------------------------------------------------------
				1906	\| Returns the remainder of the single-precision floating-point value `a'
				1907	\| with respect to the corresponding value `b'. The operation is performed
				1908	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				1909	----------------------------------------------------------------------------/
				1910
				1911	float32 float32_rem( float32 a, float32 b STATUS_PARAM )
				1912	{
				1913	flag aSign, bSign, zSign;
				1914	int16 aExp, bExp, expDiff;
				1915	bits32 aSig, bSig;
				1916	bits32 q;
				1917	bits64 aSig64, bSig64, q64;
				1918	bits32 alternateASig;
				1919	sbits32 sigMean;
				1920
				1921	aSig = extractFloat32Frac( a );
				1922	aExp = extractFloat32Exp( a );
				1923	aSign = extractFloat32Sign( a );
				1924	bSig = extractFloat32Frac( b );
				1925	bExp = extractFloat32Exp( b );
				1926	bSign = extractFloat32Sign( b );
				1927	if ( aExp == 0xFF ) {
				1928	if ( aSig \|\| ( ( bExp == 0xFF ) && bSig ) ) {
				1929	return propagateFloat32NaN( a, b STATUS_VAR );
				1930	}
				1931	float_raise( float_flag_invalid STATUS_VAR);
				1932	return float32_default_nan;
				1933	}
				1934	if ( bExp == 0xFF ) {
				1935	if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
				1936	return a;
				1937	}
				1938	if ( bExp == 0 ) {
				1939	if ( bSig == 0 ) {
				1940	float_raise( float_flag_invalid STATUS_VAR);
				1941	return float32_default_nan;
				1942	}
				1943	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
				1944	}
				1945	if ( aExp == 0 ) {
				1946	if ( aSig == 0 ) return a;
				1947	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				1948	}
				1949	expDiff = aExp - bExp;
				1950	aSig \|= 0x00800000;
				1951	bSig \|= 0x00800000;
				1952	if ( expDiff < 32 ) {
				1953	aSig <<= 8;
				1954	bSig <<= 8;
				1955	if ( expDiff < 0 ) {
				1956	if ( expDiff < -1 ) return a;
				1957	aSig >>= 1;
				1958	}
				1959	q = ( bSig <= aSig );
				1960	if ( q ) aSig -= bSig;
				1961	if ( 0 < expDiff ) {
				1962	q = ( ( (bits64) aSig )<<32 ) / bSig;
				1963	q >>= 32 - expDiff;
				1964	bSig >>= 2;
				1965	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				1966	}
				1967	else {
				1968	aSig >>= 2;
				1969	bSig >>= 2;
				1970	}
				1971	}
				1972	else {
				1973	if ( bSig <= aSig ) aSig -= bSig;
				1974	aSig64 = ( (bits64) aSig )<<40;
				1975	bSig64 = ( (bits64) bSig )<<40;
				1976	expDiff -= 64;
				1977	while ( 0 < expDiff ) {
				1978	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1979	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1980	aSig64 = - ( ( bSig * q64 )<<38 );
				1981	expDiff -= 62;
				1982	}
				1983	expDiff += 64;
				1984	q64 = estimateDiv128To64( aSig64, 0, bSig64 );
				1985	q64 = ( 2 < q64 ) ? q64 - 2 : 0;
				1986	q = q64>>( 64 - expDiff );
				1987	bSig <<= 6;
				1988	aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
				1989	}
				1990	do {
				1991	alternateASig = aSig;
				1992	++q;
				1993	aSig -= bSig;
				1994	} while ( 0 <= (sbits32) aSig );
				1995	sigMean = aSig + alternateASig;
				1996	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				1997	aSig = alternateASig;
				1998	}
				1999	zSign = ( (sbits32) aSig < 0 );
				2000	if ( zSign ) aSig = - aSig;
				2001	return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
				2002
				2003	}
				2004
				2005	/*----------------------------------------------------------------------------
				2006	\| Returns the square root of the single-precision floating-point value `a'.
				2007	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2008	\| Floating-Point Arithmetic.
				2009	----------------------------------------------------------------------------/
				2010
				2011	float32 float32_sqrt( float32 a STATUS_PARAM )
				2012	{
				2013	flag aSign;
				2014	int16 aExp, zExp;
				2015	bits32 aSig, zSig;
				2016	bits64 rem, term;
				2017
				2018	aSig = extractFloat32Frac( a );
				2019	aExp = extractFloat32Exp( a );
				2020	aSign = extractFloat32Sign( a );
				2021	if ( aExp == 0xFF ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2022	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2023	if ( ! aSign ) return a;
				2024	float_raise( float_flag_invalid STATUS_VAR);
				2025	return float32_default_nan;
				2026	}
				2027	if ( aSign ) {
				2028	if ( ( aExp \| aSig ) == 0 ) return a;
				2029	float_raise( float_flag_invalid STATUS_VAR);
				2030	return float32_default_nan;
				2031	}
				2032	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2033	if ( aSig == 0 ) return float32_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2034	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2035	}
				2036	zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
				2037	aSig = ( aSig \| 0x00800000 )<<8;
				2038	zSig = estimateSqrt32( aExp, aSig ) + 2;
				2039	if ( ( zSig & 0x7F ) <= 5 ) {
				2040	if ( zSig < 2 ) {
				2041	zSig = 0x7FFFFFFF;
				2042	goto roundAndPack;
				2043	}
				2044	aSig >>= aExp & 1;
				2045	term = ( (bits64) zSig ) * zSig;
				2046	rem = ( ( (bits64) aSig )<<32 ) - term;
				2047	while ( (sbits64) rem < 0 ) {
				2048	--zSig;
				2049	rem += ( ( (bits64) zSig )<<1 ) \| 1;
				2050	}
				2051	zSig \|= ( rem != 0 );
				2052	}
				2053	shift32RightJamming( zSig, 1, &zSig );
				2054	roundAndPack:
				2055	return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
				2056
				2057	}
				2058
				2059	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	2060	\| Returns the binary log of the single-precision floating-point value `a'.
				2061	\| The operation is performed according to the IEC/IEEE Standard for Binary
				2062	\| Floating-Point Arithmetic.
				2063	----------------------------------------------------------------------------/
				2064	float32 float32_log2( float32 a STATUS_PARAM )
				2065	{
				2066	flag aSign, zSign;
				2067	int16 aExp;
				2068	bits32 aSig, zSig, i;
				2069
				2070	aSig = extractFloat32Frac( a );
				2071	aExp = extractFloat32Exp( a );
				2072	aSign = extractFloat32Sign( a );
				2073
				2074	if ( aExp == 0 ) {
				2075	if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
				2076	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
				2077	}
				2078	if ( aSign ) {
				2079	float_raise( float_flag_invalid STATUS_VAR);
				2080	return float32_default_nan;
				2081	}
				2082	if ( aExp == 0xFF ) {
				2083	if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
				2084	return a;
				2085	}
				2086
				2087	aExp -= 0x7F;
				2088	aSig \|= 0x00800000;
				2089	zSign = aExp < 0;
				2090	zSig = aExp << 23;
				2091
				2092	for (i = 1 << 22; i > 0; i >>= 1) {
				2093	aSig = ( (bits64)aSig * aSig ) >> 23;
				2094	if ( aSig & 0x01000000 ) {
				2095	aSig >>= 1;
				2096	zSig \|= i;
				2097	}
				2098	}
				2099
				2100	if ( zSign )
				2101	zSig = -zSig;
				2102
				2103	return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
				2104	}
				2105
				2106	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2107	\| Returns 1 if the single-precision floating-point value `a' is equal to
				2108	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				2109	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2110	----------------------------------------------------------------------------/
				2111
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2112	int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2113	{
				2114
				2115	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2116	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2117	) {
				2118	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2119	float_raise( float_flag_invalid STATUS_VAR);
				2120	}
				2121	return 0;
				2122	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2123	return ( float32_val(a) == float32_val(b) ) \|\|
				2124	( (bits32) ( ( float32_val(a) \| float32_val(b) )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2125
				2126	}
				2127
				2128	/*----------------------------------------------------------------------------
				2129	\| Returns 1 if the single-precision floating-point value `a' is less than
				2130	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
				2131	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				2132	\| Arithmetic.
				2133	----------------------------------------------------------------------------/
				2134
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2135	int float32_le( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2136	{
				2137	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2138	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2139
				2140	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2141	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2142	) {
				2143	float_raise( float_flag_invalid STATUS_VAR);
				2144	return 0;
				2145	}
				2146	aSign = extractFloat32Sign( a );
				2147	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2148	av = float32_val(a);
				2149	bv = float32_val(b);
				2150	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( av \| bv )<<1 ) == 0 );
				2151	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2152
				2153	}
				2154
				2155	/*----------------------------------------------------------------------------
				2156	\| Returns 1 if the single-precision floating-point value `a' is less than
				2157	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				2158	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2159	----------------------------------------------------------------------------/
				2160
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2161	int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2162	{
				2163	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2164	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2165
				2166	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2167	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2168	) {
				2169	float_raise( float_flag_invalid STATUS_VAR);
				2170	return 0;
				2171	}
				2172	aSign = extractFloat32Sign( a );
				2173	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2174	av = float32_val(a);
				2175	bv = float32_val(b);
				2176	if ( aSign != bSign ) return aSign && ( (bits32) ( ( av \| bv )<<1 ) != 0 );
				2177	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2178
				2179	}
				2180
				2181	/*----------------------------------------------------------------------------
				2182	\| Returns 1 if the single-precision floating-point value `a' is equal to
				2183	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				2184	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				2185	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2186	----------------------------------------------------------------------------/
				2187
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2188	int float32_eq_signaling( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2189	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2190	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2191
				2192	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2193	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2194	) {
				2195	float_raise( float_flag_invalid STATUS_VAR);
				2196	return 0;
				2197	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2198	av = float32_val(a);
				2199	bv = float32_val(b);
				2200	return ( av == bv ) \|\| ( (bits32) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2201
				2202	}
				2203
				2204	/*----------------------------------------------------------------------------
				2205	\| Returns 1 if the single-precision floating-point value `a' is less than or
				2206	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				2207	\| cause an exception. Otherwise, the comparison is performed according to the
				2208	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2209	----------------------------------------------------------------------------/
				2210
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2211	int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2212	{
				2213	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2214	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2215
				2216	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2217	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2218	) {
				2219	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2220	float_raise( float_flag_invalid STATUS_VAR);
				2221	}
				2222	return 0;
				2223	}
				2224	aSign = extractFloat32Sign( a );
				2225	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2226	av = float32_val(a);
				2227	bv = float32_val(b);
				2228	if ( aSign != bSign ) return aSign \|\| ( (bits32) ( ( av \| bv )<<1 ) == 0 );
				2229	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2230
				2231	}
				2232
				2233	/*----------------------------------------------------------------------------
				2234	\| Returns 1 if the single-precision floating-point value `a' is less than
				2235	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				2236	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				2237	\| Standard for Binary Floating-Point Arithmetic.
				2238	----------------------------------------------------------------------------/
				2239
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	2240	int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2241	{
				2242	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2243	bits32 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2244
				2245	if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
				2246	\|\| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
				2247	) {
				2248	if ( float32_is_signaling_nan( a ) \|\| float32_is_signaling_nan( b ) ) {
				2249	float_raise( float_flag_invalid STATUS_VAR);
				2250	}
				2251	return 0;
				2252	}
				2253	aSign = extractFloat32Sign( a );
				2254	bSign = extractFloat32Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2255	av = float32_val(a);
				2256	bv = float32_val(b);
				2257	if ( aSign != bSign ) return aSign && ( (bits32) ( ( av \| bv )<<1 ) != 0 );
				2258	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2259
				2260	}
				2261
				2262	/*----------------------------------------------------------------------------
				2263	\| Returns the result of converting the double-precision floating-point value
				2264	\| `a' to the 32-bit two's complement integer format. The conversion is
				2265	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2266	\| Arithmetic---which means in particular that the conversion is rounded
				2267	\| according to the current rounding mode. If `a' is a NaN, the largest
				2268	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2269	\| largest integer with the same sign as `a' is returned.
				2270	----------------------------------------------------------------------------/
				2271
				2272	int32 float64_to_int32( float64 a STATUS_PARAM )
				2273	{
				2274	flag aSign;
				2275	int16 aExp, shiftCount;
				2276	bits64 aSig;
				2277
				2278	aSig = extractFloat64Frac( a );
				2279	aExp = extractFloat64Exp( a );
				2280	aSign = extractFloat64Sign( a );
				2281	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2282	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2283	shiftCount = 0x42C - aExp;
				2284	if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
				2285	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				2286
				2287	}
				2288
				2289	/*----------------------------------------------------------------------------
				2290	\| Returns the result of converting the double-precision floating-point value
				2291	\| `a' to the 32-bit two's complement integer format. The conversion is
				2292	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2293	\| Arithmetic, except that the conversion is always rounded toward zero.
				2294	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2295	\| the conversion overflows, the largest integer with the same sign as `a' is
				2296	\| returned.
				2297	----------------------------------------------------------------------------/
				2298
				2299	int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
				2300	{
				2301	flag aSign;
				2302	int16 aExp, shiftCount;
				2303	bits64 aSig, savedASig;
				2304	int32 z;
				2305
				2306	aSig = extractFloat64Frac( a );
				2307	aExp = extractFloat64Exp( a );
				2308	aSign = extractFloat64Sign( a );
				2309	if ( 0x41E < aExp ) {
				2310	if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
				2311	goto invalid;
				2312	}
				2313	else if ( aExp < 0x3FF ) {
				2314	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2315	return 0;
				2316	}
				2317	aSig \|= LIT64( 0x0010000000000000 );
				2318	shiftCount = 0x433 - aExp;
				2319	savedASig = aSig;
				2320	aSig >>= shiftCount;
				2321	z = aSig;
				2322	if ( aSign ) z = - z;
				2323	if ( ( z < 0 ) ^ aSign ) {
				2324	invalid:
				2325	float_raise( float_flag_invalid STATUS_VAR);
				2326	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				2327	}
				2328	if ( ( aSig<<shiftCount ) != savedASig ) {
				2329	STATUS(float_exception_flags) \|= float_flag_inexact;
				2330	}
				2331	return z;
				2332
				2333	}
				2334
				2335	/*----------------------------------------------------------------------------
				2336	\| Returns the result of converting the double-precision floating-point value
				2337	\| `a' to the 64-bit two's complement integer format. The conversion is
				2338	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2339	\| Arithmetic---which means in particular that the conversion is rounded
				2340	\| according to the current rounding mode. If `a' is a NaN, the largest
				2341	\| positive integer is returned. Otherwise, if the conversion overflows, the
				2342	\| largest integer with the same sign as `a' is returned.
				2343	----------------------------------------------------------------------------/
				2344
				2345	int64 float64_to_int64( float64 a STATUS_PARAM )
				2346	{
				2347	flag aSign;
				2348	int16 aExp, shiftCount;
				2349	bits64 aSig, aSigExtra;
				2350
				2351	aSig = extractFloat64Frac( a );
				2352	aExp = extractFloat64Exp( a );
				2353	aSign = extractFloat64Sign( a );
				2354	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2355	shiftCount = 0x433 - aExp;
				2356	if ( shiftCount <= 0 ) {
				2357	if ( 0x43E < aExp ) {
				2358	float_raise( float_flag_invalid STATUS_VAR);
				2359	if ( ! aSign
				2360	\|\| ( ( aExp == 0x7FF )
				2361	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2362	) {
				2363	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2364	}
				2365	return (sbits64) LIT64( 0x8000000000000000 );
				2366	}
				2367	aSigExtra = 0;
				2368	aSig <<= - shiftCount;
				2369	}
				2370	else {
				2371	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				2372	}
				2373	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				2374
				2375	}
				2376
				2377	/*----------------------------------------------------------------------------
				2378	\| Returns the result of converting the double-precision floating-point value
				2379	\| `a' to the 64-bit two's complement integer format. The conversion is
				2380	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2381	\| Arithmetic, except that the conversion is always rounded toward zero.
				2382	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				2383	\| the conversion overflows, the largest integer with the same sign as `a' is
				2384	\| returned.
				2385	----------------------------------------------------------------------------/
				2386
				2387	int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
				2388	{
				2389	flag aSign;
				2390	int16 aExp, shiftCount;
				2391	bits64 aSig;
				2392	int64 z;
				2393
				2394	aSig = extractFloat64Frac( a );
				2395	aExp = extractFloat64Exp( a );
				2396	aSign = extractFloat64Sign( a );
				2397	if ( aExp ) aSig \|= LIT64( 0x0010000000000000 );
				2398	shiftCount = aExp - 0x433;
				2399	if ( 0 <= shiftCount ) {
				2400	if ( 0x43E <= aExp ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2401	if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2402	float_raise( float_flag_invalid STATUS_VAR);
				2403	if ( ! aSign
				2404	\|\| ( ( aExp == 0x7FF )
				2405	&& ( aSig != LIT64( 0x0010000000000000 ) ) )
				2406	) {
				2407	return LIT64( 0x7FFFFFFFFFFFFFFF );
				2408	}
				2409	}
				2410	return (sbits64) LIT64( 0x8000000000000000 );
				2411	}
				2412	z = aSig<<shiftCount;
				2413	}
				2414	else {
				2415	if ( aExp < 0x3FE ) {
				2416	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				2417	return 0;
				2418	}
				2419	z = aSig>>( - shiftCount );
				2420	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
				2421	STATUS(float_exception_flags) \|= float_flag_inexact;
				2422	}
				2423	}
				2424	if ( aSign ) z = - z;
				2425	return z;
				2426
				2427	}
				2428
				2429	/*----------------------------------------------------------------------------
				2430	\| Returns the result of converting the double-precision floating-point value
				2431	\| `a' to the single-precision floating-point format. The conversion is
				2432	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2433	\| Arithmetic.
				2434	----------------------------------------------------------------------------/
				2435
				2436	float32 float64_to_float32( float64 a STATUS_PARAM )
				2437	{
				2438	flag aSign;
				2439	int16 aExp;
				2440	bits64 aSig;
				2441	bits32 zSig;
				2442
				2443	aSig = extractFloat64Frac( a );
				2444	aExp = extractFloat64Exp( a );
				2445	aSign = extractFloat64Sign( a );
				2446	if ( aExp == 0x7FF ) {
				2447	if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) );
				2448	return packFloat32( aSign, 0xFF, 0 );
				2449	}
				2450	shift64RightJamming( aSig, 22, &aSig );
				2451	zSig = aSig;
				2452	if ( aExp \|\| zSig ) {
				2453	zSig \|= 0x40000000;
				2454	aExp -= 0x381;
				2455	}
				2456	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				2457
				2458	}
				2459
				2460	#ifdef FLOATX80
				2461
				2462	/*----------------------------------------------------------------------------
				2463	\| Returns the result of converting the double-precision floating-point value
				2464	\| `a' to the extended double-precision floating-point format. The conversion
				2465	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				2466	\| Arithmetic.
				2467	----------------------------------------------------------------------------/
				2468
				2469	floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
				2470	{
				2471	flag aSign;
				2472	int16 aExp;
				2473	bits64 aSig;
				2474
				2475	aSig = extractFloat64Frac( a );
				2476	aExp = extractFloat64Exp( a );
				2477	aSign = extractFloat64Sign( a );
				2478	if ( aExp == 0x7FF ) {
				2479	if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) );
				2480	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				2481	}
				2482	if ( aExp == 0 ) {
				2483	if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
				2484	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2485	}
				2486	return
				2487	packFloatx80(
				2488	aSign, aExp + 0x3C00, ( aSig \| LIT64( 0x0010000000000000 ) )<<11 );
				2489
				2490	}
				2491
				2492	#endif
				2493
				2494	#ifdef FLOAT128
				2495
				2496	/*----------------------------------------------------------------------------
				2497	\| Returns the result of converting the double-precision floating-point value
				2498	\| `a' to the quadruple-precision floating-point format. The conversion is
				2499	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				2500	\| Arithmetic.
				2501	----------------------------------------------------------------------------/
				2502
				2503	float128 float64_to_float128( float64 a STATUS_PARAM )
				2504	{
				2505	flag aSign;
				2506	int16 aExp;
				2507	bits64 aSig, zSig0, zSig1;
				2508
				2509	aSig = extractFloat64Frac( a );
				2510	aExp = extractFloat64Exp( a );
				2511	aSign = extractFloat64Sign( a );
				2512	if ( aExp == 0x7FF ) {
				2513	if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) );
				2514	return packFloat128( aSign, 0x7FFF, 0, 0 );
				2515	}
				2516	if ( aExp == 0 ) {
				2517	if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
				2518	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2519	--aExp;
				2520	}
				2521	shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
				2522	return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
				2523
				2524	}
				2525
				2526	#endif
				2527
				2528	/*----------------------------------------------------------------------------
				2529	\| Rounds the double-precision floating-point value `a' to an integer, and
				2530	\| returns the result as a double-precision floating-point value. The
				2531	\| operation is performed according to the IEC/IEEE Standard for Binary
				2532	\| Floating-Point Arithmetic.
				2533	----------------------------------------------------------------------------/
				2534
				2535	float64 float64_round_to_int( float64 a STATUS_PARAM )
				2536	{
				2537	flag aSign;
				2538	int16 aExp;
				2539	bits64 lastBitMask, roundBitsMask;
				2540	int8 roundingMode;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2541	bits64 z;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2542
				2543	aExp = extractFloat64Exp( a );
				2544	if ( 0x433 <= aExp ) {
				2545	if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
				2546	return propagateFloat64NaN( a, a STATUS_VAR );
				2547	}
				2548	return a;
				2549	}
				2550	if ( aExp < 0x3FF ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2551	if ( (bits64) ( float64_val(a)<<1 ) == 0 ) return a;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2552	STATUS(float_exception_flags) \|= float_flag_inexact;
				2553	aSign = extractFloat64Sign( a );
				2554	switch ( STATUS(float_rounding_mode) ) {
				2555	case float_round_nearest_even:
				2556	if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
				2557	return packFloat64( aSign, 0x3FF, 0 );
				2558	}
				2559	break;
				2560	case float_round_down:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2561	return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2562	case float_round_up:
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2563	return make_float64(
				2564	aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2565	}
				2566	return packFloat64( aSign, 0, 0 );
				2567	}
				2568	lastBitMask = 1;
				2569	lastBitMask <<= 0x433 - aExp;
				2570	roundBitsMask = lastBitMask - 1;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2571	z = float64_val(a);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2572	roundingMode = STATUS(float_rounding_mode);
				2573	if ( roundingMode == float_round_nearest_even ) {
				2574	z += lastBitMask>>1;
				2575	if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
				2576	}
				2577	else if ( roundingMode != float_round_to_zero ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2578	if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2579	z += roundBitsMask;
				2580	}
				2581	}
				2582	z &= ~ roundBitsMask;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	2583	if ( z != float64_val(a) )
				2584	STATUS(float_exception_flags) \|= float_flag_inexact;
				2585	return make_float64(z);
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2586
				2587	}
				2588
pbrook	e6e5906	2006-10-22 00:18:54 +0000	[diff] [blame]	2589	float64 float64_trunc_to_int( float64 a STATUS_PARAM)
				2590	{
				2591	int oldmode;
				2592	float64 res;
				2593	oldmode = STATUS(float_rounding_mode);
				2594	STATUS(float_rounding_mode) = float_round_to_zero;
				2595	res = float64_round_to_int(a STATUS_VAR);
				2596	STATUS(float_rounding_mode) = oldmode;
				2597	return res;
				2598	}
				2599
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2600	/*----------------------------------------------------------------------------
				2601	\| Returns the result of adding the absolute values of the double-precision
				2602	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				2603	\| before being returned. `zSign' is ignored if the result is a NaN.
				2604	\| The addition is performed according to the IEC/IEEE Standard for Binary
				2605	\| Floating-Point Arithmetic.
				2606	----------------------------------------------------------------------------/
				2607
				2608	static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				2609	{
				2610	int16 aExp, bExp, zExp;
				2611	bits64 aSig, bSig, zSig;
				2612	int16 expDiff;
				2613
				2614	aSig = extractFloat64Frac( a );
				2615	aExp = extractFloat64Exp( a );
				2616	bSig = extractFloat64Frac( b );
				2617	bExp = extractFloat64Exp( b );
				2618	expDiff = aExp - bExp;
				2619	aSig <<= 9;
				2620	bSig <<= 9;
				2621	if ( 0 < expDiff ) {
				2622	if ( aExp == 0x7FF ) {
				2623	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2624	return a;
				2625	}
				2626	if ( bExp == 0 ) {
				2627	--expDiff;
				2628	}
				2629	else {
				2630	bSig \|= LIT64( 0x2000000000000000 );
				2631	}
				2632	shift64RightJamming( bSig, expDiff, &bSig );
				2633	zExp = aExp;
				2634	}
				2635	else if ( expDiff < 0 ) {
				2636	if ( bExp == 0x7FF ) {
				2637	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2638	return packFloat64( zSign, 0x7FF, 0 );
				2639	}
				2640	if ( aExp == 0 ) {
				2641	++expDiff;
				2642	}
				2643	else {
				2644	aSig \|= LIT64( 0x2000000000000000 );
				2645	}
				2646	shift64RightJamming( aSig, - expDiff, &aSig );
				2647	zExp = bExp;
				2648	}
				2649	else {
				2650	if ( aExp == 0x7FF ) {
				2651	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2652	return a;
				2653	}
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	2654	if ( aExp == 0 ) {
				2655	if ( STATUS(flush_to_zero) ) return packFloat64( zSign, 0, 0 );
				2656	return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
				2657	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	2658	zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
				2659	zExp = aExp;
				2660	goto roundAndPack;
				2661	}
				2662	aSig \|= LIT64( 0x2000000000000000 );
				2663	zSig = ( aSig + bSig )<<1;
				2664	--zExp;
				2665	if ( (sbits64) zSig < 0 ) {
				2666	zSig = aSig + bSig;
				2667	++zExp;
				2668	}
				2669	roundAndPack:
				2670	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2671
				2672	}
				2673
				2674	/*----------------------------------------------------------------------------
				2675	\| Returns the result of subtracting the absolute values of the double-
				2676	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				2677	\| difference is negated before being returned. `zSign' is ignored if the
				2678	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				2679	\| Standard for Binary Floating-Point Arithmetic.
				2680	----------------------------------------------------------------------------/
				2681
				2682	static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
				2683	{
				2684	int16 aExp, bExp, zExp;
				2685	bits64 aSig, bSig, zSig;
				2686	int16 expDiff;
				2687
				2688	aSig = extractFloat64Frac( a );
				2689	aExp = extractFloat64Exp( a );
				2690	bSig = extractFloat64Frac( b );
				2691	bExp = extractFloat64Exp( b );
				2692	expDiff = aExp - bExp;
				2693	aSig <<= 10;
				2694	bSig <<= 10;
				2695	if ( 0 < expDiff ) goto aExpBigger;
				2696	if ( expDiff < 0 ) goto bExpBigger;
				2697	if ( aExp == 0x7FF ) {
				2698	if ( aSig \| bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2699	float_raise( float_flag_invalid STATUS_VAR);
				2700	return float64_default_nan;
				2701	}
				2702	if ( aExp == 0 ) {
				2703	aExp = 1;
				2704	bExp = 1;
				2705	}
				2706	if ( bSig < aSig ) goto aBigger;
				2707	if ( aSig < bSig ) goto bBigger;
				2708	return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				2709	bExpBigger:
				2710	if ( bExp == 0x7FF ) {
				2711	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2712	return packFloat64( zSign ^ 1, 0x7FF, 0 );
				2713	}
				2714	if ( aExp == 0 ) {
				2715	++expDiff;
				2716	}
				2717	else {
				2718	aSig \|= LIT64( 0x4000000000000000 );
				2719	}
				2720	shift64RightJamming( aSig, - expDiff, &aSig );
				2721	bSig \|= LIT64( 0x4000000000000000 );
				2722	bBigger:
				2723	zSig = bSig - aSig;
				2724	zExp = bExp;
				2725	zSign ^= 1;
				2726	goto normalizeRoundAndPack;
				2727	aExpBigger:
				2728	if ( aExp == 0x7FF ) {
				2729	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2730	return a;
				2731	}
				2732	if ( bExp == 0 ) {
				2733	--expDiff;
				2734	}
				2735	else {
				2736	bSig \|= LIT64( 0x4000000000000000 );
				2737	}
				2738	shift64RightJamming( bSig, expDiff, &bSig );
				2739	aSig \|= LIT64( 0x4000000000000000 );
				2740	aBigger:
				2741	zSig = aSig - bSig;
				2742	zExp = aExp;
				2743	normalizeRoundAndPack:
				2744	--zExp;
				2745	return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2746
				2747	}
				2748
				2749	/*----------------------------------------------------------------------------
				2750	\| Returns the result of adding the double-precision floating-point values `a'
				2751	\| and `b'. The operation is performed according to the IEC/IEEE Standard for
				2752	\| Binary Floating-Point Arithmetic.
				2753	----------------------------------------------------------------------------/
				2754
				2755	float64 float64_add( float64 a, float64 b STATUS_PARAM )
				2756	{
				2757	flag aSign, bSign;
				2758
				2759	aSign = extractFloat64Sign( a );
				2760	bSign = extractFloat64Sign( b );
				2761	if ( aSign == bSign ) {
				2762	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				2763	}
				2764	else {
				2765	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				2766	}
				2767
				2768	}
				2769
				2770	/*----------------------------------------------------------------------------
				2771	\| Returns the result of subtracting the double-precision floating-point values
				2772	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2773	\| for Binary Floating-Point Arithmetic.
				2774	----------------------------------------------------------------------------/
				2775
				2776	float64 float64_sub( float64 a, float64 b STATUS_PARAM )
				2777	{
				2778	flag aSign, bSign;
				2779
				2780	aSign = extractFloat64Sign( a );
				2781	bSign = extractFloat64Sign( b );
				2782	if ( aSign == bSign ) {
				2783	return subFloat64Sigs( a, b, aSign STATUS_VAR );
				2784	}
				2785	else {
				2786	return addFloat64Sigs( a, b, aSign STATUS_VAR );
				2787	}
				2788
				2789	}
				2790
				2791	/*----------------------------------------------------------------------------
				2792	\| Returns the result of multiplying the double-precision floating-point values
				2793	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				2794	\| for Binary Floating-Point Arithmetic.
				2795	----------------------------------------------------------------------------/
				2796
				2797	float64 float64_mul( float64 a, float64 b STATUS_PARAM )
				2798	{
				2799	flag aSign, bSign, zSign;
				2800	int16 aExp, bExp, zExp;
				2801	bits64 aSig, bSig, zSig0, zSig1;
				2802
				2803	aSig = extractFloat64Frac( a );
				2804	aExp = extractFloat64Exp( a );
				2805	aSign = extractFloat64Sign( a );
				2806	bSig = extractFloat64Frac( b );
				2807	bExp = extractFloat64Exp( b );
				2808	bSign = extractFloat64Sign( b );
				2809	zSign = aSign ^ bSign;
				2810	if ( aExp == 0x7FF ) {
				2811	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2812	return propagateFloat64NaN( a, b STATUS_VAR );
				2813	}
				2814	if ( ( bExp \| bSig ) == 0 ) {
				2815	float_raise( float_flag_invalid STATUS_VAR);
				2816	return float64_default_nan;
				2817	}
				2818	return packFloat64( zSign, 0x7FF, 0 );
				2819	}
				2820	if ( bExp == 0x7FF ) {
				2821	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2822	if ( ( aExp \| aSig ) == 0 ) {
				2823	float_raise( float_flag_invalid STATUS_VAR);
				2824	return float64_default_nan;
				2825	}
				2826	return packFloat64( zSign, 0x7FF, 0 );
				2827	}
				2828	if ( aExp == 0 ) {
				2829	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2830	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2831	}
				2832	if ( bExp == 0 ) {
				2833	if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
				2834	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2835	}
				2836	zExp = aExp + bExp - 0x3FF;
				2837	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2838	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2839	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				2840	zSig0 \|= ( zSig1 != 0 );
				2841	if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
				2842	zSig0 <<= 1;
				2843	--zExp;
				2844	}
				2845	return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
				2846
				2847	}
				2848
				2849	/*----------------------------------------------------------------------------
				2850	\| Returns the result of dividing the double-precision floating-point value `a'
				2851	\| by the corresponding value `b'. The operation is performed according to
				2852	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2853	----------------------------------------------------------------------------/
				2854
				2855	float64 float64_div( float64 a, float64 b STATUS_PARAM )
				2856	{
				2857	flag aSign, bSign, zSign;
				2858	int16 aExp, bExp, zExp;
				2859	bits64 aSig, bSig, zSig;
				2860	bits64 rem0, rem1;
				2861	bits64 term0, term1;
				2862
				2863	aSig = extractFloat64Frac( a );
				2864	aExp = extractFloat64Exp( a );
				2865	aSign = extractFloat64Sign( a );
				2866	bSig = extractFloat64Frac( b );
				2867	bExp = extractFloat64Exp( b );
				2868	bSign = extractFloat64Sign( b );
				2869	zSign = aSign ^ bSign;
				2870	if ( aExp == 0x7FF ) {
				2871	if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2872	if ( bExp == 0x7FF ) {
				2873	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2874	float_raise( float_flag_invalid STATUS_VAR);
				2875	return float64_default_nan;
				2876	}
				2877	return packFloat64( zSign, 0x7FF, 0 );
				2878	}
				2879	if ( bExp == 0x7FF ) {
				2880	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2881	return packFloat64( zSign, 0, 0 );
				2882	}
				2883	if ( bExp == 0 ) {
				2884	if ( bSig == 0 ) {
				2885	if ( ( aExp \| aSig ) == 0 ) {
				2886	float_raise( float_flag_invalid STATUS_VAR);
				2887	return float64_default_nan;
				2888	}
				2889	float_raise( float_flag_divbyzero STATUS_VAR);
				2890	return packFloat64( zSign, 0x7FF, 0 );
				2891	}
				2892	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2893	}
				2894	if ( aExp == 0 ) {
				2895	if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
				2896	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2897	}
				2898	zExp = aExp - bExp + 0x3FD;
				2899	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<10;
				2900	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2901	if ( bSig <= ( aSig + aSig ) ) {
				2902	aSig >>= 1;
				2903	++zExp;
				2904	}
				2905	zSig = estimateDiv128To64( aSig, 0, bSig );
				2906	if ( ( zSig & 0x1FF ) <= 2 ) {
				2907	mul64To128( bSig, zSig, &term0, &term1 );
				2908	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				2909	while ( (sbits64) rem0 < 0 ) {
				2910	--zSig;
				2911	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				2912	}
				2913	zSig \|= ( rem1 != 0 );
				2914	}
				2915	return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
				2916
				2917	}
				2918
				2919	/*----------------------------------------------------------------------------
				2920	\| Returns the remainder of the double-precision floating-point value `a'
				2921	\| with respect to the corresponding value `b'. The operation is performed
				2922	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				2923	----------------------------------------------------------------------------/
				2924
				2925	float64 float64_rem( float64 a, float64 b STATUS_PARAM )
				2926	{
				2927	flag aSign, bSign, zSign;
				2928	int16 aExp, bExp, expDiff;
				2929	bits64 aSig, bSig;
				2930	bits64 q, alternateASig;
				2931	sbits64 sigMean;
				2932
				2933	aSig = extractFloat64Frac( a );
				2934	aExp = extractFloat64Exp( a );
				2935	aSign = extractFloat64Sign( a );
				2936	bSig = extractFloat64Frac( b );
				2937	bExp = extractFloat64Exp( b );
				2938	bSign = extractFloat64Sign( b );
				2939	if ( aExp == 0x7FF ) {
				2940	if ( aSig \|\| ( ( bExp == 0x7FF ) && bSig ) ) {
				2941	return propagateFloat64NaN( a, b STATUS_VAR );
				2942	}
				2943	float_raise( float_flag_invalid STATUS_VAR);
				2944	return float64_default_nan;
				2945	}
				2946	if ( bExp == 0x7FF ) {
				2947	if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
				2948	return a;
				2949	}
				2950	if ( bExp == 0 ) {
				2951	if ( bSig == 0 ) {
				2952	float_raise( float_flag_invalid STATUS_VAR);
				2953	return float64_default_nan;
				2954	}
				2955	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
				2956	}
				2957	if ( aExp == 0 ) {
				2958	if ( aSig == 0 ) return a;
				2959	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				2960	}
				2961	expDiff = aExp - bExp;
				2962	aSig = ( aSig \| LIT64( 0x0010000000000000 ) )<<11;
				2963	bSig = ( bSig \| LIT64( 0x0010000000000000 ) )<<11;
				2964	if ( expDiff < 0 ) {
				2965	if ( expDiff < -1 ) return a;
				2966	aSig >>= 1;
				2967	}
				2968	q = ( bSig <= aSig );
				2969	if ( q ) aSig -= bSig;
				2970	expDiff -= 64;
				2971	while ( 0 < expDiff ) {
				2972	q = estimateDiv128To64( aSig, 0, bSig );
				2973	q = ( 2 < q ) ? q - 2 : 0;
				2974	aSig = - ( ( bSig>>2 ) * q );
				2975	expDiff -= 62;
				2976	}
				2977	expDiff += 64;
				2978	if ( 0 < expDiff ) {
				2979	q = estimateDiv128To64( aSig, 0, bSig );
				2980	q = ( 2 < q ) ? q - 2 : 0;
				2981	q >>= 64 - expDiff;
				2982	bSig >>= 2;
				2983	aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
				2984	}
				2985	else {
				2986	aSig >>= 2;
				2987	bSig >>= 2;
				2988	}
				2989	do {
				2990	alternateASig = aSig;
				2991	++q;
				2992	aSig -= bSig;
				2993	} while ( 0 <= (sbits64) aSig );
				2994	sigMean = aSig + alternateASig;
				2995	if ( ( sigMean < 0 ) \|\| ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
				2996	aSig = alternateASig;
				2997	}
				2998	zSign = ( (sbits64) aSig < 0 );
				2999	if ( zSign ) aSig = - aSig;
				3000	return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
				3001
				3002	}
				3003
				3004	/*----------------------------------------------------------------------------
				3005	\| Returns the square root of the double-precision floating-point value `a'.
				3006	\| The operation is performed according to the IEC/IEEE Standard for Binary
				3007	\| Floating-Point Arithmetic.
				3008	----------------------------------------------------------------------------/
				3009
				3010	float64 float64_sqrt( float64 a STATUS_PARAM )
				3011	{
				3012	flag aSign;
				3013	int16 aExp, zExp;
				3014	bits64 aSig, zSig, doubleZSig;
				3015	bits64 rem0, rem1, term0, term1;
				3016
				3017	aSig = extractFloat64Frac( a );
				3018	aExp = extractFloat64Exp( a );
				3019	aSign = extractFloat64Sign( a );
				3020	if ( aExp == 0x7FF ) {
				3021	if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
				3022	if ( ! aSign ) return a;
				3023	float_raise( float_flag_invalid STATUS_VAR);
				3024	return float64_default_nan;
				3025	}
				3026	if ( aSign ) {
				3027	if ( ( aExp \| aSig ) == 0 ) return a;
				3028	float_raise( float_flag_invalid STATUS_VAR);
				3029	return float64_default_nan;
				3030	}
				3031	if ( aExp == 0 ) {
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3032	if ( aSig == 0 ) return float64_zero;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3033	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3034	}
				3035	zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
				3036	aSig \|= LIT64( 0x0010000000000000 );
				3037	zSig = estimateSqrt32( aExp, aSig>>21 );
				3038	aSig <<= 9 - ( aExp & 1 );
				3039	zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
				3040	if ( ( zSig & 0x1FF ) <= 5 ) {
				3041	doubleZSig = zSig<<1;
				3042	mul64To128( zSig, zSig, &term0, &term1 );
				3043	sub128( aSig, 0, term0, term1, &rem0, &rem1 );
				3044	while ( (sbits64) rem0 < 0 ) {
				3045	--zSig;
				3046	doubleZSig -= 2;
				3047	add128( rem0, rem1, zSig>>63, doubleZSig \| 1, &rem0, &rem1 );
				3048	}
				3049	zSig \|= ( ( rem0 \| rem1 ) != 0 );
				3050	}
				3051	return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
				3052
				3053	}
				3054
				3055	/*----------------------------------------------------------------------------
aurel32	374dfc3	2009-02-05 13:42:47 +0000	[diff] [blame]	3056	\| Returns the binary log of the double-precision floating-point value `a'.
				3057	\| The operation is performed according to the IEC/IEEE Standard for Binary
				3058	\| Floating-Point Arithmetic.
				3059	----------------------------------------------------------------------------/
				3060	float64 float64_log2( float64 a STATUS_PARAM )
				3061	{
				3062	flag aSign, zSign;
				3063	int16 aExp;
				3064	bits64 aSig, aSig0, aSig1, zSig, i;
				3065
				3066	aSig = extractFloat64Frac( a );
				3067	aExp = extractFloat64Exp( a );
				3068	aSign = extractFloat64Sign( a );
				3069
				3070	if ( aExp == 0 ) {
				3071	if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
				3072	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
				3073	}
				3074	if ( aSign ) {
				3075	float_raise( float_flag_invalid STATUS_VAR);
				3076	return float64_default_nan;
				3077	}
				3078	if ( aExp == 0x7FF ) {
				3079	if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
				3080	return a;
				3081	}
				3082
				3083	aExp -= 0x3FF;
				3084	aSig \|= LIT64( 0x0010000000000000 );
				3085	zSign = aExp < 0;
				3086	zSig = (bits64)aExp << 52;
				3087	for (i = 1LL << 51; i > 0; i >>= 1) {
				3088	mul64To128( aSig, aSig, &aSig0, &aSig1 );
				3089	aSig = ( aSig0 << 12 ) \| ( aSig1 >> 52 );
				3090	if ( aSig & LIT64( 0x0020000000000000 ) ) {
				3091	aSig >>= 1;
				3092	zSig \|= i;
				3093	}
				3094	}
				3095
				3096	if ( zSign )
				3097	zSig = -zSig;
				3098	return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
				3099	}
				3100
				3101	/*----------------------------------------------------------------------------
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3102	\| Returns 1 if the double-precision floating-point value `a' is equal to the
				3103	\| corresponding value `b', and 0 otherwise. The comparison is performed
				3104	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3105	----------------------------------------------------------------------------/
				3106
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3107	int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3108	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3109	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3110
				3111	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3112	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3113	) {
				3114	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3115	float_raise( float_flag_invalid STATUS_VAR);
				3116	}
				3117	return 0;
				3118	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3119	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3120	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3121	return ( av == bv ) \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3122
				3123	}
				3124
				3125	/*----------------------------------------------------------------------------
				3126	\| Returns 1 if the double-precision floating-point value `a' is less than or
				3127	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
				3128	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				3129	\| Arithmetic.
				3130	----------------------------------------------------------------------------/
				3131
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3132	int float64_le( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3133	{
				3134	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3135	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3136
				3137	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3138	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3139	) {
				3140	float_raise( float_flag_invalid STATUS_VAR);
				3141	return 0;
				3142	}
				3143	aSign = extractFloat64Sign( a );
				3144	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3145	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3146	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3147	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
				3148	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3149
				3150	}
				3151
				3152	/*----------------------------------------------------------------------------
				3153	\| Returns 1 if the double-precision floating-point value `a' is less than
				3154	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				3155	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3156	----------------------------------------------------------------------------/
				3157
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3158	int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3159	{
				3160	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3161	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3162
				3163	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3164	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3165	) {
				3166	float_raise( float_flag_invalid STATUS_VAR);
				3167	return 0;
				3168	}
				3169	aSign = extractFloat64Sign( a );
				3170	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3171	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3172	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3173	if ( aSign != bSign ) return aSign && ( (bits64) ( ( av \| bv )<<1 ) != 0 );
				3174	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3175
				3176	}
				3177
				3178	/*----------------------------------------------------------------------------
				3179	\| Returns 1 if the double-precision floating-point value `a' is equal to the
				3180	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
				3181	\| if either operand is a NaN. Otherwise, the comparison is performed
				3182	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3183	----------------------------------------------------------------------------/
				3184
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3185	int float64_eq_signaling( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3186	{
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3187	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3188
				3189	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3190	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3191	) {
				3192	float_raise( float_flag_invalid STATUS_VAR);
				3193	return 0;
				3194	}
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3195	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3196	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3197	return ( av == bv ) \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3198
				3199	}
				3200
				3201	/*----------------------------------------------------------------------------
				3202	\| Returns 1 if the double-precision floating-point value `a' is less than or
				3203	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				3204	\| cause an exception. Otherwise, the comparison is performed according to the
				3205	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3206	----------------------------------------------------------------------------/
				3207
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3208	int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3209	{
				3210	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3211	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3212
				3213	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3214	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3215	) {
				3216	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3217	float_raise( float_flag_invalid STATUS_VAR);
				3218	}
				3219	return 0;
				3220	}
				3221	aSign = extractFloat64Sign( a );
				3222	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3223	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3224	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3225	if ( aSign != bSign ) return aSign \|\| ( (bits64) ( ( av \| bv )<<1 ) == 0 );
				3226	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3227
				3228	}
				3229
				3230	/*----------------------------------------------------------------------------
				3231	\| Returns 1 if the double-precision floating-point value `a' is less than
				3232	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				3233	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				3234	\| Standard for Binary Floating-Point Arithmetic.
				3235	----------------------------------------------------------------------------/
				3236
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	3237	int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3238	{
				3239	flag aSign, bSign;
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3240	bits64 av, bv;
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3241
				3242	if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
				3243	\|\| ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
				3244	) {
				3245	if ( float64_is_signaling_nan( a ) \|\| float64_is_signaling_nan( b ) ) {
				3246	float_raise( float_flag_invalid STATUS_VAR);
				3247	}
				3248	return 0;
				3249	}
				3250	aSign = extractFloat64Sign( a );
				3251	bSign = extractFloat64Sign( b );
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3252	av = float64_val(a);
pbrook	a1b91bb	2007-11-21 15:32:12 +0000	[diff] [blame]	3253	bv = float64_val(b);
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	3254	if ( aSign != bSign ) return aSign && ( (bits64) ( ( av \| bv )<<1 ) != 0 );
				3255	return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	3256
				3257	}
				3258
				3259	#ifdef FLOATX80
				3260
				3261	/*----------------------------------------------------------------------------
				3262	\| Returns the result of converting the extended double-precision floating-
				3263	\| point value `a' to the 32-bit two's complement integer format. The
				3264	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3265	\| Floating-Point Arithmetic---which means in particular that the conversion
				3266	\| is rounded according to the current rounding mode. If `a' is a NaN, the
				3267	\| largest positive integer is returned. Otherwise, if the conversion
				3268	\| overflows, the largest integer with the same sign as `a' is returned.
				3269	----------------------------------------------------------------------------/
				3270
				3271	int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
				3272	{
				3273	flag aSign;
				3274	int32 aExp, shiftCount;
				3275	bits64 aSig;
				3276
				3277	aSig = extractFloatx80Frac( a );
				3278	aExp = extractFloatx80Exp( a );
				3279	aSign = extractFloatx80Sign( a );
				3280	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				3281	shiftCount = 0x4037 - aExp;
				3282	if ( shiftCount <= 0 ) shiftCount = 1;
				3283	shift64RightJamming( aSig, shiftCount, &aSig );
				3284	return roundAndPackInt32( aSign, aSig STATUS_VAR );
				3285
				3286	}
				3287
				3288	/*----------------------------------------------------------------------------
				3289	\| Returns the result of converting the extended double-precision floating-
				3290	\| point value `a' to the 32-bit two's complement integer format. The
				3291	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3292	\| Floating-Point Arithmetic, except that the conversion is always rounded
				3293	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				3294	\| Otherwise, if the conversion overflows, the largest integer with the same
				3295	\| sign as `a' is returned.
				3296	----------------------------------------------------------------------------/
				3297
				3298	int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
				3299	{
				3300	flag aSign;
				3301	int32 aExp, shiftCount;
				3302	bits64 aSig, savedASig;
				3303	int32 z;
				3304
				3305	aSig = extractFloatx80Frac( a );
				3306	aExp = extractFloatx80Exp( a );
				3307	aSign = extractFloatx80Sign( a );
				3308	if ( 0x401E < aExp ) {
				3309	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
				3310	goto invalid;
				3311	}
				3312	else if ( aExp < 0x3FFF ) {
				3313	if ( aExp \|\| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3314	return 0;
				3315	}
				3316	shiftCount = 0x403E - aExp;
				3317	savedASig = aSig;
				3318	aSig >>= shiftCount;
				3319	z = aSig;
				3320	if ( aSign ) z = - z;
				3321	if ( ( z < 0 ) ^ aSign ) {
				3322	invalid:
				3323	float_raise( float_flag_invalid STATUS_VAR);
				3324	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				3325	}
				3326	if ( ( aSig<<shiftCount ) != savedASig ) {
				3327	STATUS(float_exception_flags) \|= float_flag_inexact;
				3328	}
				3329	return z;
				3330
				3331	}
				3332
				3333	/*----------------------------------------------------------------------------
				3334	\| Returns the result of converting the extended double-precision floating-
				3335	\| point value `a' to the 64-bit two's complement integer format. The
				3336	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3337	\| Floating-Point Arithmetic---which means in particular that the conversion
				3338	\| is rounded according to the current rounding mode. If `a' is a NaN,
				3339	\| the largest positive integer is returned. Otherwise, if the conversion
				3340	\| overflows, the largest integer with the same sign as `a' is returned.
				3341	----------------------------------------------------------------------------/
				3342
				3343	int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
				3344	{
				3345	flag aSign;
				3346	int32 aExp, shiftCount;
				3347	bits64 aSig, aSigExtra;
				3348
				3349	aSig = extractFloatx80Frac( a );
				3350	aExp = extractFloatx80Exp( a );
				3351	aSign = extractFloatx80Sign( a );
				3352	shiftCount = 0x403E - aExp;
				3353	if ( shiftCount <= 0 ) {
				3354	if ( shiftCount ) {
				3355	float_raise( float_flag_invalid STATUS_VAR);
				3356	if ( ! aSign
				3357	\|\| ( ( aExp == 0x7FFF )
				3358	&& ( aSig != LIT64( 0x8000000000000000 ) ) )
				3359	) {
				3360	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3361	}
				3362	return (sbits64) LIT64( 0x8000000000000000 );
				3363	}
				3364	aSigExtra = 0;
				3365	}
				3366	else {
				3367	shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
				3368	}
				3369	return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
				3370
				3371	}
				3372
				3373	/*----------------------------------------------------------------------------
				3374	\| Returns the result of converting the extended double-precision floating-
				3375	\| point value `a' to the 64-bit two's complement integer format. The
				3376	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3377	\| Floating-Point Arithmetic, except that the conversion is always rounded
				3378	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
				3379	\| Otherwise, if the conversion overflows, the largest integer with the same
				3380	\| sign as `a' is returned.
				3381	----------------------------------------------------------------------------/
				3382
				3383	int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
				3384	{
				3385	flag aSign;
				3386	int32 aExp, shiftCount;
				3387	bits64 aSig;
				3388	int64 z;
				3389
				3390	aSig = extractFloatx80Frac( a );
				3391	aExp = extractFloatx80Exp( a );
				3392	aSign = extractFloatx80Sign( a );
				3393	shiftCount = aExp - 0x403E;
				3394	if ( 0 <= shiftCount ) {
				3395	aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
				3396	if ( ( a.high != 0xC03E ) \|\| aSig ) {
				3397	float_raise( float_flag_invalid STATUS_VAR);
				3398	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && aSig ) ) {
				3399	return LIT64( 0x7FFFFFFFFFFFFFFF );
				3400	}
				3401	}
				3402	return (sbits64) LIT64( 0x8000000000000000 );
				3403	}
				3404	else if ( aExp < 0x3FFF ) {
				3405	if ( aExp \| aSig ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3406	return 0;
				3407	}
				3408	z = aSig>>( - shiftCount );
				3409	if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
				3410	STATUS(float_exception_flags) \|= float_flag_inexact;
				3411	}
				3412	if ( aSign ) z = - z;
				3413	return z;
				3414
				3415	}
				3416
				3417	/*----------------------------------------------------------------------------
				3418	\| Returns the result of converting the extended double-precision floating-
				3419	\| point value `a' to the single-precision floating-point format. The
				3420	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3421	\| Floating-Point Arithmetic.
				3422	----------------------------------------------------------------------------/
				3423
				3424	float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
				3425	{
				3426	flag aSign;
				3427	int32 aExp;
				3428	bits64 aSig;
				3429
				3430	aSig = extractFloatx80Frac( a );
				3431	aExp = extractFloatx80Exp( a );
				3432	aSign = extractFloatx80Sign( a );
				3433	if ( aExp == 0x7FFF ) {
				3434	if ( (bits64) ( aSig<<1 ) ) {
				3435	return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) );
				3436	}
				3437	return packFloat32( aSign, 0xFF, 0 );
				3438	}
				3439	shift64RightJamming( aSig, 33, &aSig );
				3440	if ( aExp \|\| aSig ) aExp -= 0x3F81;
				3441	return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
				3442
				3443	}
				3444
				3445	/*----------------------------------------------------------------------------
				3446	\| Returns the result of converting the extended double-precision floating-
				3447	\| point value `a' to the double-precision floating-point format. The
				3448	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3449	\| Floating-Point Arithmetic.
				3450	----------------------------------------------------------------------------/
				3451
				3452	float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
				3453	{
				3454	flag aSign;
				3455	int32 aExp;
				3456	bits64 aSig, zSig;
				3457
				3458	aSig = extractFloatx80Frac( a );
				3459	aExp = extractFloatx80Exp( a );
				3460	aSign = extractFloatx80Sign( a );
				3461	if ( aExp == 0x7FFF ) {
				3462	if ( (bits64) ( aSig<<1 ) ) {
				3463	return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) );
				3464	}
				3465	return packFloat64( aSign, 0x7FF, 0 );
				3466	}
				3467	shift64RightJamming( aSig, 1, &zSig );
				3468	if ( aExp \|\| aSig ) aExp -= 0x3C01;
				3469	return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
				3470
				3471	}
				3472
				3473	#ifdef FLOAT128
				3474
				3475	/*----------------------------------------------------------------------------
				3476	\| Returns the result of converting the extended double-precision floating-
				3477	\| point value `a' to the quadruple-precision floating-point format. The
				3478	\| conversion is performed according to the IEC/IEEE Standard for Binary
				3479	\| Floating-Point Arithmetic.
				3480	----------------------------------------------------------------------------/
				3481
				3482	float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
				3483	{
				3484	flag aSign;
				3485	int16 aExp;
				3486	bits64 aSig, zSig0, zSig1;
				3487
				3488	aSig = extractFloatx80Frac( a );
				3489	aExp = extractFloatx80Exp( a );
				3490	aSign = extractFloatx80Sign( a );
				3491	if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
				3492	return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) );
				3493	}
				3494	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
				3495	return packFloat128( aSign, aExp, zSig0, zSig1 );
				3496
				3497	}
				3498
				3499	#endif
				3500
				3501	/*----------------------------------------------------------------------------
				3502	\| Rounds the extended double-precision floating-point value `a' to an integer,
				3503	\| and returns the result as an extended quadruple-precision floating-point
				3504	\| value. The operation is performed according to the IEC/IEEE Standard for
				3505	\| Binary Floating-Point Arithmetic.
				3506	----------------------------------------------------------------------------/
				3507
				3508	floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
				3509	{
				3510	flag aSign;
				3511	int32 aExp;
				3512	bits64 lastBitMask, roundBitsMask;
				3513	int8 roundingMode;
				3514	floatx80 z;
				3515
				3516	aExp = extractFloatx80Exp( a );
				3517	if ( 0x403E <= aExp ) {
				3518	if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
				3519	return propagateFloatx80NaN( a, a STATUS_VAR );
				3520	}
				3521	return a;
				3522	}
				3523	if ( aExp < 0x3FFF ) {
				3524	if ( ( aExp == 0 )
				3525	&& ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
				3526	return a;
				3527	}
				3528	STATUS(float_exception_flags) \|= float_flag_inexact;
				3529	aSign = extractFloatx80Sign( a );
				3530	switch ( STATUS(float_rounding_mode) ) {
				3531	case float_round_nearest_even:
				3532	if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
				3533	) {
				3534	return
				3535	packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
				3536	}
				3537	break;
				3538	case float_round_down:
				3539	return
				3540	aSign ?
				3541	packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
				3542	: packFloatx80( 0, 0, 0 );
				3543	case float_round_up:
				3544	return
				3545	aSign ? packFloatx80( 1, 0, 0 )
				3546	: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
				3547	}
				3548	return packFloatx80( aSign, 0, 0 );
				3549	}
				3550	lastBitMask = 1;
				3551	lastBitMask <<= 0x403E - aExp;
				3552	roundBitsMask = lastBitMask - 1;
				3553	z = a;
				3554	roundingMode = STATUS(float_rounding_mode);
				3555	if ( roundingMode == float_round_nearest_even ) {
				3556	z.low += lastBitMask>>1;
				3557	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				3558	}
				3559	else if ( roundingMode != float_round_to_zero ) {
				3560	if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
				3561	z.low += roundBitsMask;
				3562	}
				3563	}
				3564	z.low &= ~ roundBitsMask;
				3565	if ( z.low == 0 ) {
				3566	++z.high;
				3567	z.low = LIT64( 0x8000000000000000 );
				3568	}
				3569	if ( z.low != a.low ) STATUS(float_exception_flags) \|= float_flag_inexact;
				3570	return z;
				3571
				3572	}
				3573
				3574	/*----------------------------------------------------------------------------
				3575	\| Returns the result of adding the absolute values of the extended double-
				3576	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
				3577	\| negated before being returned. `zSign' is ignored if the result is a NaN.
				3578	\| The addition is performed according to the IEC/IEEE Standard for Binary
				3579	\| Floating-Point Arithmetic.
				3580	----------------------------------------------------------------------------/
				3581
				3582	static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
				3583	{
				3584	int32 aExp, bExp, zExp;
				3585	bits64 aSig, bSig, zSig0, zSig1;
				3586	int32 expDiff;
				3587
				3588	aSig = extractFloatx80Frac( a );
				3589	aExp = extractFloatx80Exp( a );
				3590	bSig = extractFloatx80Frac( b );
				3591	bExp = extractFloatx80Exp( b );
				3592	expDiff = aExp - bExp;
				3593	if ( 0 < expDiff ) {
				3594	if ( aExp == 0x7FFF ) {
				3595	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3596	return a;
				3597	}
				3598	if ( bExp == 0 ) --expDiff;
				3599	shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				3600	zExp = aExp;
				3601	}
				3602	else if ( expDiff < 0 ) {
				3603	if ( bExp == 0x7FFF ) {
				3604	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3605	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3606	}
				3607	if ( aExp == 0 ) ++expDiff;
				3608	shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				3609	zExp = bExp;
				3610	}
				3611	else {
				3612	if ( aExp == 0x7FFF ) {
				3613	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				3614	return propagateFloatx80NaN( a, b STATUS_VAR );
				3615	}
				3616	return a;
				3617	}
				3618	zSig1 = 0;
				3619	zSig0 = aSig + bSig;
				3620	if ( aExp == 0 ) {
				3621	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
				3622	goto roundAndPack;
				3623	}
				3624	zExp = aExp;
				3625	goto shiftRight1;
				3626	}
				3627	zSig0 = aSig + bSig;
				3628	if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
				3629	shiftRight1:
				3630	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
				3631	zSig0 \|= LIT64( 0x8000000000000000 );
				3632	++zExp;
				3633	roundAndPack:
				3634	return
				3635	roundAndPackFloatx80(
				3636	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3637
				3638	}
				3639
				3640	/*----------------------------------------------------------------------------
				3641	\| Returns the result of subtracting the absolute values of the extended
				3642	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
				3643	\| difference is negated before being returned. `zSign' is ignored if the
				3644	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				3645	\| Standard for Binary Floating-Point Arithmetic.
				3646	----------------------------------------------------------------------------/
				3647
				3648	static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
				3649	{
				3650	int32 aExp, bExp, zExp;
				3651	bits64 aSig, bSig, zSig0, zSig1;
				3652	int32 expDiff;
				3653	floatx80 z;
				3654
				3655	aSig = extractFloatx80Frac( a );
				3656	aExp = extractFloatx80Exp( a );
				3657	bSig = extractFloatx80Frac( b );
				3658	bExp = extractFloatx80Exp( b );
				3659	expDiff = aExp - bExp;
				3660	if ( 0 < expDiff ) goto aExpBigger;
				3661	if ( expDiff < 0 ) goto bExpBigger;
				3662	if ( aExp == 0x7FFF ) {
				3663	if ( (bits64) ( ( aSig \| bSig )<<1 ) ) {
				3664	return propagateFloatx80NaN( a, b STATUS_VAR );
				3665	}
				3666	float_raise( float_flag_invalid STATUS_VAR);
				3667	z.low = floatx80_default_nan_low;
				3668	z.high = floatx80_default_nan_high;
				3669	return z;
				3670	}
				3671	if ( aExp == 0 ) {
				3672	aExp = 1;
				3673	bExp = 1;
				3674	}
				3675	zSig1 = 0;
				3676	if ( bSig < aSig ) goto aBigger;
				3677	if ( aSig < bSig ) goto bBigger;
				3678	return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
				3679	bExpBigger:
				3680	if ( bExp == 0x7FFF ) {
				3681	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3682	return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3683	}
				3684	if ( aExp == 0 ) ++expDiff;
				3685	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
				3686	bBigger:
				3687	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
				3688	zExp = bExp;
				3689	zSign ^= 1;
				3690	goto normalizeRoundAndPack;
				3691	aExpBigger:
				3692	if ( aExp == 0x7FFF ) {
				3693	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3694	return a;
				3695	}
				3696	if ( bExp == 0 ) --expDiff;
				3697	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
				3698	aBigger:
				3699	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
				3700	zExp = aExp;
				3701	normalizeRoundAndPack:
				3702	return
				3703	normalizeRoundAndPackFloatx80(
				3704	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3705
				3706	}
				3707
				3708	/*----------------------------------------------------------------------------
				3709	\| Returns the result of adding the extended double-precision floating-point
				3710	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				3711	\| Standard for Binary Floating-Point Arithmetic.
				3712	----------------------------------------------------------------------------/
				3713
				3714	floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
				3715	{
				3716	flag aSign, bSign;
				3717
				3718	aSign = extractFloatx80Sign( a );
				3719	bSign = extractFloatx80Sign( b );
				3720	if ( aSign == bSign ) {
				3721	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				3722	}
				3723	else {
				3724	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				3725	}
				3726
				3727	}
				3728
				3729	/*----------------------------------------------------------------------------
				3730	\| Returns the result of subtracting the extended double-precision floating-
				3731	\| point values `a' and `b'. The operation is performed according to the
				3732	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3733	----------------------------------------------------------------------------/
				3734
				3735	floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
				3736	{
				3737	flag aSign, bSign;
				3738
				3739	aSign = extractFloatx80Sign( a );
				3740	bSign = extractFloatx80Sign( b );
				3741	if ( aSign == bSign ) {
				3742	return subFloatx80Sigs( a, b, aSign STATUS_VAR );
				3743	}
				3744	else {
				3745	return addFloatx80Sigs( a, b, aSign STATUS_VAR );
				3746	}
				3747
				3748	}
				3749
				3750	/*----------------------------------------------------------------------------
				3751	\| Returns the result of multiplying the extended double-precision floating-
				3752	\| point values `a' and `b'. The operation is performed according to the
				3753	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3754	----------------------------------------------------------------------------/
				3755
				3756	floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
				3757	{
				3758	flag aSign, bSign, zSign;
				3759	int32 aExp, bExp, zExp;
				3760	bits64 aSig, bSig, zSig0, zSig1;
				3761	floatx80 z;
				3762
				3763	aSig = extractFloatx80Frac( a );
				3764	aExp = extractFloatx80Exp( a );
				3765	aSign = extractFloatx80Sign( a );
				3766	bSig = extractFloatx80Frac( b );
				3767	bExp = extractFloatx80Exp( b );
				3768	bSign = extractFloatx80Sign( b );
				3769	zSign = aSign ^ bSign;
				3770	if ( aExp == 0x7FFF ) {
				3771	if ( (bits64) ( aSig<<1 )
				3772	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3773	return propagateFloatx80NaN( a, b STATUS_VAR );
				3774	}
				3775	if ( ( bExp \| bSig ) == 0 ) goto invalid;
				3776	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3777	}
				3778	if ( bExp == 0x7FFF ) {
				3779	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3780	if ( ( aExp \| aSig ) == 0 ) {
				3781	invalid:
				3782	float_raise( float_flag_invalid STATUS_VAR);
				3783	z.low = floatx80_default_nan_low;
				3784	z.high = floatx80_default_nan_high;
				3785	return z;
				3786	}
				3787	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3788	}
				3789	if ( aExp == 0 ) {
				3790	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3791	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3792	}
				3793	if ( bExp == 0 ) {
				3794	if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3795	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3796	}
				3797	zExp = aExp + bExp - 0x3FFE;
				3798	mul64To128( aSig, bSig, &zSig0, &zSig1 );
				3799	if ( 0 < (sbits64) zSig0 ) {
				3800	shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
				3801	--zExp;
				3802	}
				3803	return
				3804	roundAndPackFloatx80(
				3805	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3806
				3807	}
				3808
				3809	/*----------------------------------------------------------------------------
				3810	\| Returns the result of dividing the extended double-precision floating-point
				3811	\| value `a' by the corresponding value `b'. The operation is performed
				3812	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3813	----------------------------------------------------------------------------/
				3814
				3815	floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
				3816	{
				3817	flag aSign, bSign, zSign;
				3818	int32 aExp, bExp, zExp;
				3819	bits64 aSig, bSig, zSig0, zSig1;
				3820	bits64 rem0, rem1, rem2, term0, term1, term2;
				3821	floatx80 z;
				3822
				3823	aSig = extractFloatx80Frac( a );
				3824	aExp = extractFloatx80Exp( a );
				3825	aSign = extractFloatx80Sign( a );
				3826	bSig = extractFloatx80Frac( b );
				3827	bExp = extractFloatx80Exp( b );
				3828	bSign = extractFloatx80Sign( b );
				3829	zSign = aSign ^ bSign;
				3830	if ( aExp == 0x7FFF ) {
				3831	if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3832	if ( bExp == 0x7FFF ) {
				3833	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3834	goto invalid;
				3835	}
				3836	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3837	}
				3838	if ( bExp == 0x7FFF ) {
				3839	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3840	return packFloatx80( zSign, 0, 0 );
				3841	}
				3842	if ( bExp == 0 ) {
				3843	if ( bSig == 0 ) {
				3844	if ( ( aExp \| aSig ) == 0 ) {
				3845	invalid:
				3846	float_raise( float_flag_invalid STATUS_VAR);
				3847	z.low = floatx80_default_nan_low;
				3848	z.high = floatx80_default_nan_high;
				3849	return z;
				3850	}
				3851	float_raise( float_flag_divbyzero STATUS_VAR);
				3852	return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				3853	}
				3854	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3855	}
				3856	if ( aExp == 0 ) {
				3857	if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
				3858	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
				3859	}
				3860	zExp = aExp - bExp + 0x3FFE;
				3861	rem1 = 0;
				3862	if ( bSig <= aSig ) {
				3863	shift128Right( aSig, 0, 1, &aSig, &rem1 );
				3864	++zExp;
				3865	}
				3866	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
				3867	mul64To128( bSig, zSig0, &term0, &term1 );
				3868	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
				3869	while ( (sbits64) rem0 < 0 ) {
				3870	--zSig0;
				3871	add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
				3872	}
				3873	zSig1 = estimateDiv128To64( rem1, 0, bSig );
				3874	if ( (bits64) ( zSig1<<1 ) <= 8 ) {
				3875	mul64To128( bSig, zSig1, &term1, &term2 );
				3876	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				3877	while ( (sbits64) rem1 < 0 ) {
				3878	--zSig1;
				3879	add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
				3880	}
				3881	zSig1 \|= ( ( rem1 \| rem2 ) != 0 );
				3882	}
				3883	return
				3884	roundAndPackFloatx80(
				3885	STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
				3886
				3887	}
				3888
				3889	/*----------------------------------------------------------------------------
				3890	\| Returns the remainder of the extended double-precision floating-point value
				3891	\| `a' with respect to the corresponding value `b'. The operation is performed
				3892	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				3893	----------------------------------------------------------------------------/
				3894
				3895	floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
				3896	{
				3897	flag aSign, bSign, zSign;
				3898	int32 aExp, bExp, expDiff;
				3899	bits64 aSig0, aSig1, bSig;
				3900	bits64 q, term0, term1, alternateASig0, alternateASig1;
				3901	floatx80 z;
				3902
				3903	aSig0 = extractFloatx80Frac( a );
				3904	aExp = extractFloatx80Exp( a );
				3905	aSign = extractFloatx80Sign( a );
				3906	bSig = extractFloatx80Frac( b );
				3907	bExp = extractFloatx80Exp( b );
				3908	bSign = extractFloatx80Sign( b );
				3909	if ( aExp == 0x7FFF ) {
				3910	if ( (bits64) ( aSig0<<1 )
				3911	\|\| ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
				3912	return propagateFloatx80NaN( a, b STATUS_VAR );
				3913	}
				3914	goto invalid;
				3915	}
				3916	if ( bExp == 0x7FFF ) {
				3917	if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
				3918	return a;
				3919	}
				3920	if ( bExp == 0 ) {
				3921	if ( bSig == 0 ) {
				3922	invalid:
				3923	float_raise( float_flag_invalid STATUS_VAR);
				3924	z.low = floatx80_default_nan_low;
				3925	z.high = floatx80_default_nan_high;
				3926	return z;
				3927	}
				3928	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
				3929	}
				3930	if ( aExp == 0 ) {
				3931	if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
				3932	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				3933	}
				3934	bSig \|= LIT64( 0x8000000000000000 );
				3935	zSign = aSign;
				3936	expDiff = aExp - bExp;
				3937	aSig1 = 0;
				3938	if ( expDiff < 0 ) {
				3939	if ( expDiff < -1 ) return a;
				3940	shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
				3941	expDiff = 0;
				3942	}
				3943	q = ( bSig <= aSig0 );
				3944	if ( q ) aSig0 -= bSig;
				3945	expDiff -= 64;
				3946	while ( 0 < expDiff ) {
				3947	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3948	q = ( 2 < q ) ? q - 2 : 0;
				3949	mul64To128( bSig, q, &term0, &term1 );
				3950	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3951	shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
				3952	expDiff -= 62;
				3953	}
				3954	expDiff += 64;
				3955	if ( 0 < expDiff ) {
				3956	q = estimateDiv128To64( aSig0, aSig1, bSig );
				3957	q = ( 2 < q ) ? q - 2 : 0;
				3958	q >>= 64 - expDiff;
				3959	mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
				3960	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3961	shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
				3962	while ( le128( term0, term1, aSig0, aSig1 ) ) {
				3963	++q;
				3964	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
				3965	}
				3966	}
				3967	else {
				3968	term1 = 0;
				3969	term0 = bSig;
				3970	}
				3971	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
				3972	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3973	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
				3974	&& ( q & 1 ) )
				3975	) {
				3976	aSig0 = alternateASig0;
				3977	aSig1 = alternateASig1;
				3978	zSign = ! zSign;
				3979	}
				3980	return
				3981	normalizeRoundAndPackFloatx80(
				3982	80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
				3983
				3984	}
				3985
				3986	/*----------------------------------------------------------------------------
				3987	\| Returns the square root of the extended double-precision floating-point
				3988	\| value `a'. The operation is performed according to the IEC/IEEE Standard
				3989	\| for Binary Floating-Point Arithmetic.
				3990	----------------------------------------------------------------------------/
				3991
				3992	floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
				3993	{
				3994	flag aSign;
				3995	int32 aExp, zExp;
				3996	bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
				3997	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				3998	floatx80 z;
				3999
				4000	aSig0 = extractFloatx80Frac( a );
				4001	aExp = extractFloatx80Exp( a );
				4002	aSign = extractFloatx80Sign( a );
				4003	if ( aExp == 0x7FFF ) {
				4004	if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
				4005	if ( ! aSign ) return a;
				4006	goto invalid;
				4007	}
				4008	if ( aSign ) {
				4009	if ( ( aExp \| aSig0 ) == 0 ) return a;
				4010	invalid:
				4011	float_raise( float_flag_invalid STATUS_VAR);
				4012	z.low = floatx80_default_nan_low;
				4013	z.high = floatx80_default_nan_high;
				4014	return z;
				4015	}
				4016	if ( aExp == 0 ) {
				4017	if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
				4018	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
				4019	}
				4020	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
				4021	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
				4022	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
				4023	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				4024	doubleZSig0 = zSig0<<1;
				4025	mul64To128( zSig0, zSig0, &term0, &term1 );
				4026	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				4027	while ( (sbits64) rem0 < 0 ) {
				4028	--zSig0;
				4029	doubleZSig0 -= 2;
				4030	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				4031	}
				4032	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				4033	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
				4034	if ( zSig1 == 0 ) zSig1 = 1;
				4035	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				4036	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				4037	mul64To128( zSig1, zSig1, &term2, &term3 );
				4038	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				4039	while ( (sbits64) rem1 < 0 ) {
				4040	--zSig1;
				4041	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				4042	term3 \|= 1;
				4043	term2 \|= doubleZSig0;
				4044	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				4045	}
				4046	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				4047	}
				4048	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
				4049	zSig0 \|= doubleZSig0;
				4050	return
				4051	roundAndPackFloatx80(
				4052	STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
				4053
				4054	}
				4055
				4056	/*----------------------------------------------------------------------------
				4057	\| Returns 1 if the extended double-precision floating-point value `a' is
				4058	\| equal to the corresponding value `b', and 0 otherwise. The comparison is
				4059	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
				4060	\| Arithmetic.
				4061	----------------------------------------------------------------------------/
				4062
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4063	int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4064	{
				4065
				4066	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4067	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4068	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4069	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4070	) {
				4071	if ( floatx80_is_signaling_nan( a )
				4072	\|\| floatx80_is_signaling_nan( b ) ) {
				4073	float_raise( float_flag_invalid STATUS_VAR);
				4074	}
				4075	return 0;
				4076	}
				4077	return
				4078	( a.low == b.low )
				4079	&& ( ( a.high == b.high )
				4080	\|\| ( ( a.low == 0 )
				4081	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				4082	);
				4083
				4084	}
				4085
				4086	/*----------------------------------------------------------------------------
				4087	\| Returns 1 if the extended double-precision floating-point value `a' is
				4088	\| less than or equal to the corresponding value `b', and 0 otherwise. The
				4089	\| comparison is performed according to the IEC/IEEE Standard for Binary
				4090	\| Floating-Point Arithmetic.
				4091	----------------------------------------------------------------------------/
				4092
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4093	int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4094	{
				4095	flag aSign, bSign;
				4096
				4097	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4098	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4099	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4100	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4101	) {
				4102	float_raise( float_flag_invalid STATUS_VAR);
				4103	return 0;
				4104	}
				4105	aSign = extractFloatx80Sign( a );
				4106	bSign = extractFloatx80Sign( b );
				4107	if ( aSign != bSign ) {
				4108	return
				4109	aSign
				4110	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4111	== 0 );
				4112	}
				4113	return
				4114	aSign ? le128( b.high, b.low, a.high, a.low )
				4115	: le128( a.high, a.low, b.high, b.low );
				4116
				4117	}
				4118
				4119	/*----------------------------------------------------------------------------
				4120	\| Returns 1 if the extended double-precision floating-point value `a' is
				4121	\| less than the corresponding value `b', and 0 otherwise. The comparison
				4122	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4123	\| Arithmetic.
				4124	----------------------------------------------------------------------------/
				4125
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4126	int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4127	{
				4128	flag aSign, bSign;
				4129
				4130	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4131	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4132	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4133	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4134	) {
				4135	float_raise( float_flag_invalid STATUS_VAR);
				4136	return 0;
				4137	}
				4138	aSign = extractFloatx80Sign( a );
				4139	bSign = extractFloatx80Sign( b );
				4140	if ( aSign != bSign ) {
				4141	return
				4142	aSign
				4143	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4144	!= 0 );
				4145	}
				4146	return
				4147	aSign ? lt128( b.high, b.low, a.high, a.low )
				4148	: lt128( a.high, a.low, b.high, b.low );
				4149
				4150	}
				4151
				4152	/*----------------------------------------------------------------------------
				4153	\| Returns 1 if the extended double-precision floating-point value `a' is equal
				4154	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
				4155	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				4156	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4157	----------------------------------------------------------------------------/
				4158
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4159	int floatx80_eq_signaling( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4160	{
				4161
				4162	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4163	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4164	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4165	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4166	) {
				4167	float_raise( float_flag_invalid STATUS_VAR);
				4168	return 0;
				4169	}
				4170	return
				4171	( a.low == b.low )
				4172	&& ( ( a.high == b.high )
				4173	\|\| ( ( a.low == 0 )
				4174	&& ( (bits16) ( ( a.high \| b.high )<<1 ) == 0 ) )
				4175	);
				4176
				4177	}
				4178
				4179	/*----------------------------------------------------------------------------
				4180	\| Returns 1 if the extended double-precision floating-point value `a' is less
				4181	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
				4182	\| do not cause an exception. Otherwise, the comparison is performed according
				4183	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4184	----------------------------------------------------------------------------/
				4185
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4186	int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4187	{
				4188	flag aSign, bSign;
				4189
				4190	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4191	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4192	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4193	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4194	) {
				4195	if ( floatx80_is_signaling_nan( a )
				4196	\|\| floatx80_is_signaling_nan( b ) ) {
				4197	float_raise( float_flag_invalid STATUS_VAR);
				4198	}
				4199	return 0;
				4200	}
				4201	aSign = extractFloatx80Sign( a );
				4202	bSign = extractFloatx80Sign( b );
				4203	if ( aSign != bSign ) {
				4204	return
				4205	aSign
				4206	\|\| ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4207	== 0 );
				4208	}
				4209	return
				4210	aSign ? le128( b.high, b.low, a.high, a.low )
				4211	: le128( a.high, a.low, b.high, b.low );
				4212
				4213	}
				4214
				4215	/*----------------------------------------------------------------------------
				4216	\| Returns 1 if the extended double-precision floating-point value `a' is less
				4217	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
				4218	\| an exception. Otherwise, the comparison is performed according to the
				4219	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4220	----------------------------------------------------------------------------/
				4221
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	4222	int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4223	{
				4224	flag aSign, bSign;
				4225
				4226	if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
				4227	&& (bits64) ( extractFloatx80Frac( a )<<1 ) )
				4228	\|\| ( ( extractFloatx80Exp( b ) == 0x7FFF )
				4229	&& (bits64) ( extractFloatx80Frac( b )<<1 ) )
				4230	) {
				4231	if ( floatx80_is_signaling_nan( a )
				4232	\|\| floatx80_is_signaling_nan( b ) ) {
				4233	float_raise( float_flag_invalid STATUS_VAR);
				4234	}
				4235	return 0;
				4236	}
				4237	aSign = extractFloatx80Sign( a );
				4238	bSign = extractFloatx80Sign( b );
				4239	if ( aSign != bSign ) {
				4240	return
				4241	aSign
				4242	&& ( ( ( (bits16) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				4243	!= 0 );
				4244	}
				4245	return
				4246	aSign ? lt128( b.high, b.low, a.high, a.low )
				4247	: lt128( a.high, a.low, b.high, b.low );
				4248
				4249	}
				4250
				4251	#endif
				4252
				4253	#ifdef FLOAT128
				4254
				4255	/*----------------------------------------------------------------------------
				4256	\| Returns the result of converting the quadruple-precision floating-point
				4257	\| value `a' to the 32-bit two's complement integer format. The conversion
				4258	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4259	\| Arithmetic---which means in particular that the conversion is rounded
				4260	\| according to the current rounding mode. If `a' is a NaN, the largest
				4261	\| positive integer is returned. Otherwise, if the conversion overflows, the
				4262	\| largest integer with the same sign as `a' is returned.
				4263	----------------------------------------------------------------------------/
				4264
				4265	int32 float128_to_int32( float128 a STATUS_PARAM )
				4266	{
				4267	flag aSign;
				4268	int32 aExp, shiftCount;
				4269	bits64 aSig0, aSig1;
				4270
				4271	aSig1 = extractFloat128Frac1( a );
				4272	aSig0 = extractFloat128Frac0( a );
				4273	aExp = extractFloat128Exp( a );
				4274	aSign = extractFloat128Sign( a );
				4275	if ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) aSign = 0;
				4276	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4277	aSig0 \|= ( aSig1 != 0 );
				4278	shiftCount = 0x4028 - aExp;
				4279	if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
				4280	return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
				4281
				4282	}
				4283
				4284	/*----------------------------------------------------------------------------
				4285	\| Returns the result of converting the quadruple-precision floating-point
				4286	\| value `a' to the 32-bit two's complement integer format. The conversion
				4287	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4288	\| Arithmetic, except that the conversion is always rounded toward zero. If
				4289	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
				4290	\| conversion overflows, the largest integer with the same sign as `a' is
				4291	\| returned.
				4292	----------------------------------------------------------------------------/
				4293
				4294	int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
				4295	{
				4296	flag aSign;
				4297	int32 aExp, shiftCount;
				4298	bits64 aSig0, aSig1, savedASig;
				4299	int32 z;
				4300
				4301	aSig1 = extractFloat128Frac1( a );
				4302	aSig0 = extractFloat128Frac0( a );
				4303	aExp = extractFloat128Exp( a );
				4304	aSign = extractFloat128Sign( a );
				4305	aSig0 \|= ( aSig1 != 0 );
				4306	if ( 0x401E < aExp ) {
				4307	if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
				4308	goto invalid;
				4309	}
				4310	else if ( aExp < 0x3FFF ) {
				4311	if ( aExp \|\| aSig0 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4312	return 0;
				4313	}
				4314	aSig0 \|= LIT64( 0x0001000000000000 );
				4315	shiftCount = 0x402F - aExp;
				4316	savedASig = aSig0;
				4317	aSig0 >>= shiftCount;
				4318	z = aSig0;
				4319	if ( aSign ) z = - z;
				4320	if ( ( z < 0 ) ^ aSign ) {
				4321	invalid:
				4322	float_raise( float_flag_invalid STATUS_VAR);
				4323	return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
				4324	}
				4325	if ( ( aSig0<<shiftCount ) != savedASig ) {
				4326	STATUS(float_exception_flags) \|= float_flag_inexact;
				4327	}
				4328	return z;
				4329
				4330	}
				4331
				4332	/*----------------------------------------------------------------------------
				4333	\| Returns the result of converting the quadruple-precision floating-point
				4334	\| value `a' to the 64-bit two's complement integer format. The conversion
				4335	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4336	\| Arithmetic---which means in particular that the conversion is rounded
				4337	\| according to the current rounding mode. If `a' is a NaN, the largest
				4338	\| positive integer is returned. Otherwise, if the conversion overflows, the
				4339	\| largest integer with the same sign as `a' is returned.
				4340	----------------------------------------------------------------------------/
				4341
				4342	int64 float128_to_int64( float128 a STATUS_PARAM )
				4343	{
				4344	flag aSign;
				4345	int32 aExp, shiftCount;
				4346	bits64 aSig0, aSig1;
				4347
				4348	aSig1 = extractFloat128Frac1( a );
				4349	aSig0 = extractFloat128Frac0( a );
				4350	aExp = extractFloat128Exp( a );
				4351	aSign = extractFloat128Sign( a );
				4352	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4353	shiftCount = 0x402F - aExp;
				4354	if ( shiftCount <= 0 ) {
				4355	if ( 0x403E < aExp ) {
				4356	float_raise( float_flag_invalid STATUS_VAR);
				4357	if ( ! aSign
				4358	\|\| ( ( aExp == 0x7FFF )
				4359	&& ( aSig1 \|\| ( aSig0 != LIT64( 0x0001000000000000 ) ) )
				4360	)
				4361	) {
				4362	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4363	}
				4364	return (sbits64) LIT64( 0x8000000000000000 );
				4365	}
				4366	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
				4367	}
				4368	else {
				4369	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
				4370	}
				4371	return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
				4372
				4373	}
				4374
				4375	/*----------------------------------------------------------------------------
				4376	\| Returns the result of converting the quadruple-precision floating-point
				4377	\| value `a' to the 64-bit two's complement integer format. The conversion
				4378	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4379	\| Arithmetic, except that the conversion is always rounded toward zero.
				4380	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
				4381	\| the conversion overflows, the largest integer with the same sign as `a' is
				4382	\| returned.
				4383	----------------------------------------------------------------------------/
				4384
				4385	int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
				4386	{
				4387	flag aSign;
				4388	int32 aExp, shiftCount;
				4389	bits64 aSig0, aSig1;
				4390	int64 z;
				4391
				4392	aSig1 = extractFloat128Frac1( a );
				4393	aSig0 = extractFloat128Frac0( a );
				4394	aExp = extractFloat128Exp( a );
				4395	aSign = extractFloat128Sign( a );
				4396	if ( aExp ) aSig0 \|= LIT64( 0x0001000000000000 );
				4397	shiftCount = aExp - 0x402F;
				4398	if ( 0 < shiftCount ) {
				4399	if ( 0x403E <= aExp ) {
				4400	aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
				4401	if ( ( a.high == LIT64( 0xC03E000000000000 ) )
				4402	&& ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
				4403	if ( aSig1 ) STATUS(float_exception_flags) \|= float_flag_inexact;
				4404	}
				4405	else {
				4406	float_raise( float_flag_invalid STATUS_VAR);
				4407	if ( ! aSign \|\| ( ( aExp == 0x7FFF ) && ( aSig0 \| aSig1 ) ) ) {
				4408	return LIT64( 0x7FFFFFFFFFFFFFFF );
				4409	}
				4410	}
				4411	return (sbits64) LIT64( 0x8000000000000000 );
				4412	}
				4413	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & 63 ) );
				4414	if ( (bits64) ( aSig1<<shiftCount ) ) {
				4415	STATUS(float_exception_flags) \|= float_flag_inexact;
				4416	}
				4417	}
				4418	else {
				4419	if ( aExp < 0x3FFF ) {
				4420	if ( aExp \| aSig0 \| aSig1 ) {
				4421	STATUS(float_exception_flags) \|= float_flag_inexact;
				4422	}
				4423	return 0;
				4424	}
				4425	z = aSig0>>( - shiftCount );
				4426	if ( aSig1
				4427	\|\| ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
				4428	STATUS(float_exception_flags) \|= float_flag_inexact;
				4429	}
				4430	}
				4431	if ( aSign ) z = - z;
				4432	return z;
				4433
				4434	}
				4435
				4436	/*----------------------------------------------------------------------------
				4437	\| Returns the result of converting the quadruple-precision floating-point
				4438	\| value `a' to the single-precision floating-point format. The conversion
				4439	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4440	\| Arithmetic.
				4441	----------------------------------------------------------------------------/
				4442
				4443	float32 float128_to_float32( float128 a STATUS_PARAM )
				4444	{
				4445	flag aSign;
				4446	int32 aExp;
				4447	bits64 aSig0, aSig1;
				4448	bits32 zSig;
				4449
				4450	aSig1 = extractFloat128Frac1( a );
				4451	aSig0 = extractFloat128Frac0( a );
				4452	aExp = extractFloat128Exp( a );
				4453	aSign = extractFloat128Sign( a );
				4454	if ( aExp == 0x7FFF ) {
				4455	if ( aSig0 \| aSig1 ) {
				4456	return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) );
				4457	}
				4458	return packFloat32( aSign, 0xFF, 0 );
				4459	}
				4460	aSig0 \|= ( aSig1 != 0 );
				4461	shift64RightJamming( aSig0, 18, &aSig0 );
				4462	zSig = aSig0;
				4463	if ( aExp \|\| zSig ) {
				4464	zSig \|= 0x40000000;
				4465	aExp -= 0x3F81;
				4466	}
				4467	return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
				4468
				4469	}
				4470
				4471	/*----------------------------------------------------------------------------
				4472	\| Returns the result of converting the quadruple-precision floating-point
				4473	\| value `a' to the double-precision floating-point format. The conversion
				4474	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				4475	\| Arithmetic.
				4476	----------------------------------------------------------------------------/
				4477
				4478	float64 float128_to_float64( float128 a STATUS_PARAM )
				4479	{
				4480	flag aSign;
				4481	int32 aExp;
				4482	bits64 aSig0, aSig1;
				4483
				4484	aSig1 = extractFloat128Frac1( a );
				4485	aSig0 = extractFloat128Frac0( a );
				4486	aExp = extractFloat128Exp( a );
				4487	aSign = extractFloat128Sign( a );
				4488	if ( aExp == 0x7FFF ) {
				4489	if ( aSig0 \| aSig1 ) {
				4490	return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) );
				4491	}
				4492	return packFloat64( aSign, 0x7FF, 0 );
				4493	}
				4494	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				4495	aSig0 \|= ( aSig1 != 0 );
				4496	if ( aExp \|\| aSig0 ) {
				4497	aSig0 \|= LIT64( 0x4000000000000000 );
				4498	aExp -= 0x3C01;
				4499	}
				4500	return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
				4501
				4502	}
				4503
				4504	#ifdef FLOATX80
				4505
				4506	/*----------------------------------------------------------------------------
				4507	\| Returns the result of converting the quadruple-precision floating-point
				4508	\| value `a' to the extended double-precision floating-point format. The
				4509	\| conversion is performed according to the IEC/IEEE Standard for Binary
				4510	\| Floating-Point Arithmetic.
				4511	----------------------------------------------------------------------------/
				4512
				4513	floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
				4514	{
				4515	flag aSign;
				4516	int32 aExp;
				4517	bits64 aSig0, aSig1;
				4518
				4519	aSig1 = extractFloat128Frac1( a );
				4520	aSig0 = extractFloat128Frac0( a );
				4521	aExp = extractFloat128Exp( a );
				4522	aSign = extractFloat128Sign( a );
				4523	if ( aExp == 0x7FFF ) {
				4524	if ( aSig0 \| aSig1 ) {
				4525	return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) );
				4526	}
				4527	return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
				4528	}
				4529	if ( aExp == 0 ) {
				4530	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
				4531	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4532	}
				4533	else {
				4534	aSig0 \|= LIT64( 0x0001000000000000 );
				4535	}
				4536	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
				4537	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
				4538
				4539	}
				4540
				4541	#endif
				4542
				4543	/*----------------------------------------------------------------------------
				4544	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
				4545	\| returns the result as a quadruple-precision floating-point value. The
				4546	\| operation is performed according to the IEC/IEEE Standard for Binary
				4547	\| Floating-Point Arithmetic.
				4548	----------------------------------------------------------------------------/
				4549
				4550	float128 float128_round_to_int( float128 a STATUS_PARAM )
				4551	{
				4552	flag aSign;
				4553	int32 aExp;
				4554	bits64 lastBitMask, roundBitsMask;
				4555	int8 roundingMode;
				4556	float128 z;
				4557
				4558	aExp = extractFloat128Exp( a );
				4559	if ( 0x402F <= aExp ) {
				4560	if ( 0x406F <= aExp ) {
				4561	if ( ( aExp == 0x7FFF )
				4562	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
				4563	) {
				4564	return propagateFloat128NaN( a, a STATUS_VAR );
				4565	}
				4566	return a;
				4567	}
				4568	lastBitMask = 1;
				4569	lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
				4570	roundBitsMask = lastBitMask - 1;
				4571	z = a;
				4572	roundingMode = STATUS(float_rounding_mode);
				4573	if ( roundingMode == float_round_nearest_even ) {
				4574	if ( lastBitMask ) {
				4575	add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
				4576	if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
				4577	}
				4578	else {
				4579	if ( (sbits64) z.low < 0 ) {
				4580	++z.high;
				4581	if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
				4582	}
				4583	}
				4584	}
				4585	else if ( roundingMode != float_round_to_zero ) {
				4586	if ( extractFloat128Sign( z )
				4587	^ ( roundingMode == float_round_up ) ) {
				4588	add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
				4589	}
				4590	}
				4591	z.low &= ~ roundBitsMask;
				4592	}
				4593	else {
				4594	if ( aExp < 0x3FFF ) {
				4595	if ( ( ( (bits64) ( a.high<<1 ) ) \| a.low ) == 0 ) return a;
				4596	STATUS(float_exception_flags) \|= float_flag_inexact;
				4597	aSign = extractFloat128Sign( a );
				4598	switch ( STATUS(float_rounding_mode) ) {
				4599	case float_round_nearest_even:
				4600	if ( ( aExp == 0x3FFE )
				4601	&& ( extractFloat128Frac0( a )
				4602	\| extractFloat128Frac1( a ) )
				4603	) {
				4604	return packFloat128( aSign, 0x3FFF, 0, 0 );
				4605	}
				4606	break;
				4607	case float_round_down:
				4608	return
				4609	aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
				4610	: packFloat128( 0, 0, 0, 0 );
				4611	case float_round_up:
				4612	return
				4613	aSign ? packFloat128( 1, 0, 0, 0 )
				4614	: packFloat128( 0, 0x3FFF, 0, 0 );
				4615	}
				4616	return packFloat128( aSign, 0, 0, 0 );
				4617	}
				4618	lastBitMask = 1;
				4619	lastBitMask <<= 0x402F - aExp;
				4620	roundBitsMask = lastBitMask - 1;
				4621	z.low = 0;
				4622	z.high = a.high;
				4623	roundingMode = STATUS(float_rounding_mode);
				4624	if ( roundingMode == float_round_nearest_even ) {
				4625	z.high += lastBitMask>>1;
				4626	if ( ( ( z.high & roundBitsMask ) \| a.low ) == 0 ) {
				4627	z.high &= ~ lastBitMask;
				4628	}
				4629	}
				4630	else if ( roundingMode != float_round_to_zero ) {
				4631	if ( extractFloat128Sign( z )
				4632	^ ( roundingMode == float_round_up ) ) {
				4633	z.high \|= ( a.low != 0 );
				4634	z.high += roundBitsMask;
				4635	}
				4636	}
				4637	z.high &= ~ roundBitsMask;
				4638	}
				4639	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
				4640	STATUS(float_exception_flags) \|= float_flag_inexact;
				4641	}
				4642	return z;
				4643
				4644	}
				4645
				4646	/*----------------------------------------------------------------------------
				4647	\| Returns the result of adding the absolute values of the quadruple-precision
				4648	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
				4649	\| before being returned. `zSign' is ignored if the result is a NaN.
				4650	\| The addition is performed according to the IEC/IEEE Standard for Binary
				4651	\| Floating-Point Arithmetic.
				4652	----------------------------------------------------------------------------/
				4653
				4654	static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				4655	{
				4656	int32 aExp, bExp, zExp;
				4657	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				4658	int32 expDiff;
				4659
				4660	aSig1 = extractFloat128Frac1( a );
				4661	aSig0 = extractFloat128Frac0( a );
				4662	aExp = extractFloat128Exp( a );
				4663	bSig1 = extractFloat128Frac1( b );
				4664	bSig0 = extractFloat128Frac0( b );
				4665	bExp = extractFloat128Exp( b );
				4666	expDiff = aExp - bExp;
				4667	if ( 0 < expDiff ) {
				4668	if ( aExp == 0x7FFF ) {
				4669	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4670	return a;
				4671	}
				4672	if ( bExp == 0 ) {
				4673	--expDiff;
				4674	}
				4675	else {
				4676	bSig0 \|= LIT64( 0x0001000000000000 );
				4677	}
				4678	shift128ExtraRightJamming(
				4679	bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
				4680	zExp = aExp;
				4681	}
				4682	else if ( expDiff < 0 ) {
				4683	if ( bExp == 0x7FFF ) {
				4684	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4685	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4686	}
				4687	if ( aExp == 0 ) {
				4688	++expDiff;
				4689	}
				4690	else {
				4691	aSig0 \|= LIT64( 0x0001000000000000 );
				4692	}
				4693	shift128ExtraRightJamming(
				4694	aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
				4695	zExp = bExp;
				4696	}
				4697	else {
				4698	if ( aExp == 0x7FFF ) {
				4699	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				4700	return propagateFloat128NaN( a, b STATUS_VAR );
				4701	}
				4702	return a;
				4703	}
				4704	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
pbrook	fe76d97	2008-12-19 14:33:59 +0000	[diff] [blame]	4705	if ( aExp == 0 ) {
				4706	if ( STATUS(flush_to_zero) ) return packFloat128( zSign, 0, 0, 0 );
				4707	return packFloat128( zSign, 0, zSig0, zSig1 );
				4708	}
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	4709	zSig2 = 0;
				4710	zSig0 \|= LIT64( 0x0002000000000000 );
				4711	zExp = aExp;
				4712	goto shiftRight1;
				4713	}
				4714	aSig0 \|= LIT64( 0x0001000000000000 );
				4715	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4716	--zExp;
				4717	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
				4718	++zExp;
				4719	shiftRight1:
				4720	shift128ExtraRightJamming(
				4721	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				4722	roundAndPack:
				4723	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4724
				4725	}
				4726
				4727	/*----------------------------------------------------------------------------
				4728	\| Returns the result of subtracting the absolute values of the quadruple-
				4729	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
				4730	\| difference is negated before being returned. `zSign' is ignored if the
				4731	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
				4732	\| Standard for Binary Floating-Point Arithmetic.
				4733	----------------------------------------------------------------------------/
				4734
				4735	static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
				4736	{
				4737	int32 aExp, bExp, zExp;
				4738	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
				4739	int32 expDiff;
				4740	float128 z;
				4741
				4742	aSig1 = extractFloat128Frac1( a );
				4743	aSig0 = extractFloat128Frac0( a );
				4744	aExp = extractFloat128Exp( a );
				4745	bSig1 = extractFloat128Frac1( b );
				4746	bSig0 = extractFloat128Frac0( b );
				4747	bExp = extractFloat128Exp( b );
				4748	expDiff = aExp - bExp;
				4749	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
				4750	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
				4751	if ( 0 < expDiff ) goto aExpBigger;
				4752	if ( expDiff < 0 ) goto bExpBigger;
				4753	if ( aExp == 0x7FFF ) {
				4754	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
				4755	return propagateFloat128NaN( a, b STATUS_VAR );
				4756	}
				4757	float_raise( float_flag_invalid STATUS_VAR);
				4758	z.low = float128_default_nan_low;
				4759	z.high = float128_default_nan_high;
				4760	return z;
				4761	}
				4762	if ( aExp == 0 ) {
				4763	aExp = 1;
				4764	bExp = 1;
				4765	}
				4766	if ( bSig0 < aSig0 ) goto aBigger;
				4767	if ( aSig0 < bSig0 ) goto bBigger;
				4768	if ( bSig1 < aSig1 ) goto aBigger;
				4769	if ( aSig1 < bSig1 ) goto bBigger;
				4770	return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
				4771	bExpBigger:
				4772	if ( bExp == 0x7FFF ) {
				4773	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4774	return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
				4775	}
				4776	if ( aExp == 0 ) {
				4777	++expDiff;
				4778	}
				4779	else {
				4780	aSig0 \|= LIT64( 0x4000000000000000 );
				4781	}
				4782	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				4783	bSig0 \|= LIT64( 0x4000000000000000 );
				4784	bBigger:
				4785	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
				4786	zExp = bExp;
				4787	zSign ^= 1;
				4788	goto normalizeRoundAndPack;
				4789	aExpBigger:
				4790	if ( aExp == 0x7FFF ) {
				4791	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4792	return a;
				4793	}
				4794	if ( bExp == 0 ) {
				4795	--expDiff;
				4796	}
				4797	else {
				4798	bSig0 \|= LIT64( 0x4000000000000000 );
				4799	}
				4800	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
				4801	aSig0 \|= LIT64( 0x4000000000000000 );
				4802	aBigger:
				4803	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
				4804	zExp = aExp;
				4805	normalizeRoundAndPack:
				4806	--zExp;
				4807	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
				4808
				4809	}
				4810
				4811	/*----------------------------------------------------------------------------
				4812	\| Returns the result of adding the quadruple-precision floating-point values
				4813	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
				4814	\| for Binary Floating-Point Arithmetic.
				4815	----------------------------------------------------------------------------/
				4816
				4817	float128 float128_add( float128 a, float128 b STATUS_PARAM )
				4818	{
				4819	flag aSign, bSign;
				4820
				4821	aSign = extractFloat128Sign( a );
				4822	bSign = extractFloat128Sign( b );
				4823	if ( aSign == bSign ) {
				4824	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				4825	}
				4826	else {
				4827	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				4828	}
				4829
				4830	}
				4831
				4832	/*----------------------------------------------------------------------------
				4833	\| Returns the result of subtracting the quadruple-precision floating-point
				4834	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4835	\| Standard for Binary Floating-Point Arithmetic.
				4836	----------------------------------------------------------------------------/
				4837
				4838	float128 float128_sub( float128 a, float128 b STATUS_PARAM )
				4839	{
				4840	flag aSign, bSign;
				4841
				4842	aSign = extractFloat128Sign( a );
				4843	bSign = extractFloat128Sign( b );
				4844	if ( aSign == bSign ) {
				4845	return subFloat128Sigs( a, b, aSign STATUS_VAR );
				4846	}
				4847	else {
				4848	return addFloat128Sigs( a, b, aSign STATUS_VAR );
				4849	}
				4850
				4851	}
				4852
				4853	/*----------------------------------------------------------------------------
				4854	\| Returns the result of multiplying the quadruple-precision floating-point
				4855	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
				4856	\| Standard for Binary Floating-Point Arithmetic.
				4857	----------------------------------------------------------------------------/
				4858
				4859	float128 float128_mul( float128 a, float128 b STATUS_PARAM )
				4860	{
				4861	flag aSign, bSign, zSign;
				4862	int32 aExp, bExp, zExp;
				4863	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
				4864	float128 z;
				4865
				4866	aSig1 = extractFloat128Frac1( a );
				4867	aSig0 = extractFloat128Frac0( a );
				4868	aExp = extractFloat128Exp( a );
				4869	aSign = extractFloat128Sign( a );
				4870	bSig1 = extractFloat128Frac1( b );
				4871	bSig0 = extractFloat128Frac0( b );
				4872	bExp = extractFloat128Exp( b );
				4873	bSign = extractFloat128Sign( b );
				4874	zSign = aSign ^ bSign;
				4875	if ( aExp == 0x7FFF ) {
				4876	if ( ( aSig0 \| aSig1 )
				4877	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				4878	return propagateFloat128NaN( a, b STATUS_VAR );
				4879	}
				4880	if ( ( bExp \| bSig0 \| bSig1 ) == 0 ) goto invalid;
				4881	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4882	}
				4883	if ( bExp == 0x7FFF ) {
				4884	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4885	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				4886	invalid:
				4887	float_raise( float_flag_invalid STATUS_VAR);
				4888	z.low = float128_default_nan_low;
				4889	z.high = float128_default_nan_high;
				4890	return z;
				4891	}
				4892	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4893	}
				4894	if ( aExp == 0 ) {
				4895	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4896	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4897	}
				4898	if ( bExp == 0 ) {
				4899	if ( ( bSig0 \| bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4900	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4901	}
				4902	zExp = aExp + bExp - 0x4000;
				4903	aSig0 \|= LIT64( 0x0001000000000000 );
				4904	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
				4905	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
				4906	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
				4907	zSig2 \|= ( zSig3 != 0 );
				4908	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
				4909	shift128ExtraRightJamming(
				4910	zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
				4911	++zExp;
				4912	}
				4913	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4914
				4915	}
				4916
				4917	/*----------------------------------------------------------------------------
				4918	\| Returns the result of dividing the quadruple-precision floating-point value
				4919	\| `a' by the corresponding value `b'. The operation is performed according to
				4920	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				4921	----------------------------------------------------------------------------/
				4922
				4923	float128 float128_div( float128 a, float128 b STATUS_PARAM )
				4924	{
				4925	flag aSign, bSign, zSign;
				4926	int32 aExp, bExp, zExp;
				4927	bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
				4928	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				4929	float128 z;
				4930
				4931	aSig1 = extractFloat128Frac1( a );
				4932	aSig0 = extractFloat128Frac0( a );
				4933	aExp = extractFloat128Exp( a );
				4934	aSign = extractFloat128Sign( a );
				4935	bSig1 = extractFloat128Frac1( b );
				4936	bSig0 = extractFloat128Frac0( b );
				4937	bExp = extractFloat128Exp( b );
				4938	bSign = extractFloat128Sign( b );
				4939	zSign = aSign ^ bSign;
				4940	if ( aExp == 0x7FFF ) {
				4941	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4942	if ( bExp == 0x7FFF ) {
				4943	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4944	goto invalid;
				4945	}
				4946	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4947	}
				4948	if ( bExp == 0x7FFF ) {
				4949	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				4950	return packFloat128( zSign, 0, 0, 0 );
				4951	}
				4952	if ( bExp == 0 ) {
				4953	if ( ( bSig0 \| bSig1 ) == 0 ) {
				4954	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) {
				4955	invalid:
				4956	float_raise( float_flag_invalid STATUS_VAR);
				4957	z.low = float128_default_nan_low;
				4958	z.high = float128_default_nan_high;
				4959	return z;
				4960	}
				4961	float_raise( float_flag_divbyzero STATUS_VAR);
				4962	return packFloat128( zSign, 0x7FFF, 0, 0 );
				4963	}
				4964	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				4965	}
				4966	if ( aExp == 0 ) {
				4967	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
				4968	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				4969	}
				4970	zExp = aExp - bExp + 0x3FFD;
				4971	shortShift128Left(
				4972	aSig0 \| LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
				4973	shortShift128Left(
				4974	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				4975	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
				4976	shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
				4977	++zExp;
				4978	}
				4979	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
				4980	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
				4981	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
				4982	while ( (sbits64) rem0 < 0 ) {
				4983	--zSig0;
				4984	add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
				4985	}
				4986	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
				4987	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
				4988	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
				4989	sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
				4990	while ( (sbits64) rem1 < 0 ) {
				4991	--zSig1;
				4992	add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
				4993	}
				4994	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				4995	}
				4996	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
				4997	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				4998
				4999	}
				5000
				5001	/*----------------------------------------------------------------------------
				5002	\| Returns the remainder of the quadruple-precision floating-point value `a'
				5003	\| with respect to the corresponding value `b'. The operation is performed
				5004	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5005	----------------------------------------------------------------------------/
				5006
				5007	float128 float128_rem( float128 a, float128 b STATUS_PARAM )
				5008	{
				5009	flag aSign, bSign, zSign;
				5010	int32 aExp, bExp, expDiff;
				5011	bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
				5012	bits64 allZero, alternateASig0, alternateASig1, sigMean1;
				5013	sbits64 sigMean0;
				5014	float128 z;
				5015
				5016	aSig1 = extractFloat128Frac1( a );
				5017	aSig0 = extractFloat128Frac0( a );
				5018	aExp = extractFloat128Exp( a );
				5019	aSign = extractFloat128Sign( a );
				5020	bSig1 = extractFloat128Frac1( b );
				5021	bSig0 = extractFloat128Frac0( b );
				5022	bExp = extractFloat128Exp( b );
				5023	bSign = extractFloat128Sign( b );
				5024	if ( aExp == 0x7FFF ) {
				5025	if ( ( aSig0 \| aSig1 )
				5026	\|\| ( ( bExp == 0x7FFF ) && ( bSig0 \| bSig1 ) ) ) {
				5027	return propagateFloat128NaN( a, b STATUS_VAR );
				5028	}
				5029	goto invalid;
				5030	}
				5031	if ( bExp == 0x7FFF ) {
				5032	if ( bSig0 \| bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
				5033	return a;
				5034	}
				5035	if ( bExp == 0 ) {
				5036	if ( ( bSig0 \| bSig1 ) == 0 ) {
				5037	invalid:
				5038	float_raise( float_flag_invalid STATUS_VAR);
				5039	z.low = float128_default_nan_low;
				5040	z.high = float128_default_nan_high;
				5041	return z;
				5042	}
				5043	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
				5044	}
				5045	if ( aExp == 0 ) {
				5046	if ( ( aSig0 \| aSig1 ) == 0 ) return a;
				5047	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5048	}
				5049	expDiff = aExp - bExp;
				5050	if ( expDiff < -1 ) return a;
				5051	shortShift128Left(
				5052	aSig0 \| LIT64( 0x0001000000000000 ),
				5053	aSig1,
				5054	15 - ( expDiff < 0 ),
				5055	&aSig0,
				5056	&aSig1
				5057	);
				5058	shortShift128Left(
				5059	bSig0 \| LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
				5060	q = le128( bSig0, bSig1, aSig0, aSig1 );
				5061	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				5062	expDiff -= 64;
				5063	while ( 0 < expDiff ) {
				5064	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				5065	q = ( 4 < q ) ? q - 4 : 0;
				5066	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				5067	shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
				5068	shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
				5069	sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
				5070	expDiff -= 61;
				5071	}
				5072	if ( -64 < expDiff ) {
				5073	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
				5074	q = ( 4 < q ) ? q - 4 : 0;
				5075	q >>= - expDiff;
				5076	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				5077	expDiff += 52;
				5078	if ( expDiff < 0 ) {
				5079	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
				5080	}
				5081	else {
				5082	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
				5083	}
				5084	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
				5085	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
				5086	}
				5087	else {
				5088	shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
				5089	shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
				5090	}
				5091	do {
				5092	alternateASig0 = aSig0;
				5093	alternateASig1 = aSig1;
				5094	++q;
				5095	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
				5096	} while ( 0 <= (sbits64) aSig0 );
				5097	add128(
blueswir1	b55266b	2008-09-20 08:07:15 +0000	[diff] [blame]	5098	aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5099	if ( ( sigMean0 < 0 )
				5100	\|\| ( ( ( sigMean0 \| sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
				5101	aSig0 = alternateASig0;
				5102	aSig1 = alternateASig1;
				5103	}
				5104	zSign = ( (sbits64) aSig0 < 0 );
				5105	if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
				5106	return
				5107	normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
				5108
				5109	}
				5110
				5111	/*----------------------------------------------------------------------------
				5112	\| Returns the square root of the quadruple-precision floating-point value `a'.
				5113	\| The operation is performed according to the IEC/IEEE Standard for Binary
				5114	\| Floating-Point Arithmetic.
				5115	----------------------------------------------------------------------------/
				5116
				5117	float128 float128_sqrt( float128 a STATUS_PARAM )
				5118	{
				5119	flag aSign;
				5120	int32 aExp, zExp;
				5121	bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
				5122	bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
				5123	float128 z;
				5124
				5125	aSig1 = extractFloat128Frac1( a );
				5126	aSig0 = extractFloat128Frac0( a );
				5127	aExp = extractFloat128Exp( a );
				5128	aSign = extractFloat128Sign( a );
				5129	if ( aExp == 0x7FFF ) {
				5130	if ( aSig0 \| aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
				5131	if ( ! aSign ) return a;
				5132	goto invalid;
				5133	}
				5134	if ( aSign ) {
				5135	if ( ( aExp \| aSig0 \| aSig1 ) == 0 ) return a;
				5136	invalid:
				5137	float_raise( float_flag_invalid STATUS_VAR);
				5138	z.low = float128_default_nan_low;
				5139	z.high = float128_default_nan_high;
				5140	return z;
				5141	}
				5142	if ( aExp == 0 ) {
				5143	if ( ( aSig0 \| aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
				5144	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
				5145	}
				5146	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
				5147	aSig0 \|= LIT64( 0x0001000000000000 );
				5148	zSig0 = estimateSqrt32( aExp, aSig0>>17 );
				5149	shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
				5150	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
				5151	doubleZSig0 = zSig0<<1;
				5152	mul64To128( zSig0, zSig0, &term0, &term1 );
				5153	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
				5154	while ( (sbits64) rem0 < 0 ) {
				5155	--zSig0;
				5156	doubleZSig0 -= 2;
				5157	add128( rem0, rem1, zSig0>>63, doubleZSig0 \| 1, &rem0, &rem1 );
				5158	}
				5159	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
				5160	if ( ( zSig1 & 0x1FFF ) <= 5 ) {
				5161	if ( zSig1 == 0 ) zSig1 = 1;
				5162	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
				5163	sub128( rem1, 0, term1, term2, &rem1, &rem2 );
				5164	mul64To128( zSig1, zSig1, &term2, &term3 );
				5165	sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
				5166	while ( (sbits64) rem1 < 0 ) {
				5167	--zSig1;
				5168	shortShift128Left( 0, zSig1, 1, &term2, &term3 );
				5169	term3 \|= 1;
				5170	term2 \|= doubleZSig0;
				5171	add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
				5172	}
				5173	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != 0 );
				5174	}
				5175	shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
				5176	return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
				5177
				5178	}
				5179
				5180	/*----------------------------------------------------------------------------
				5181	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
				5182	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				5183	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5184	----------------------------------------------------------------------------/
				5185
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5186	int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5187	{
				5188
				5189	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5190	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5191	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5192	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5193	) {
				5194	if ( float128_is_signaling_nan( a )
				5195	\|\| float128_is_signaling_nan( b ) ) {
				5196	float_raise( float_flag_invalid STATUS_VAR);
				5197	}
				5198	return 0;
				5199	}
				5200	return
				5201	( a.low == b.low )
				5202	&& ( ( a.high == b.high )
				5203	\|\| ( ( a.low == 0 )
				5204	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
				5205	);
				5206
				5207	}
				5208
				5209	/*----------------------------------------------------------------------------
				5210	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5211	\| or equal to the corresponding value `b', and 0 otherwise. The comparison
				5212	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
				5213	\| Arithmetic.
				5214	----------------------------------------------------------------------------/
				5215
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5216	int float128_le( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5217	{
				5218	flag aSign, bSign;
				5219
				5220	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5221	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5222	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5223	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5224	) {
				5225	float_raise( float_flag_invalid STATUS_VAR);
				5226	return 0;
				5227	}
				5228	aSign = extractFloat128Sign( a );
				5229	bSign = extractFloat128Sign( b );
				5230	if ( aSign != bSign ) {
				5231	return
				5232	aSign
				5233	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5234	== 0 );
				5235	}
				5236	return
				5237	aSign ? le128( b.high, b.low, a.high, a.low )
				5238	: le128( a.high, a.low, b.high, b.low );
				5239
				5240	}
				5241
				5242	/*----------------------------------------------------------------------------
				5243	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5244	\| the corresponding value `b', and 0 otherwise. The comparison is performed
				5245	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5246	----------------------------------------------------------------------------/
				5247
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5248	int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5249	{
				5250	flag aSign, bSign;
				5251
				5252	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5253	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5254	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5255	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5256	) {
				5257	float_raise( float_flag_invalid STATUS_VAR);
				5258	return 0;
				5259	}
				5260	aSign = extractFloat128Sign( a );
				5261	bSign = extractFloat128Sign( b );
				5262	if ( aSign != bSign ) {
				5263	return
				5264	aSign
				5265	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5266	!= 0 );
				5267	}
				5268	return
				5269	aSign ? lt128( b.high, b.low, a.high, a.low )
				5270	: lt128( a.high, a.low, b.high, b.low );
				5271
				5272	}
				5273
				5274	/*----------------------------------------------------------------------------
				5275	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
				5276	\| the corresponding value `b', and 0 otherwise. The invalid exception is
				5277	\| raised if either operand is a NaN. Otherwise, the comparison is performed
				5278	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5279	----------------------------------------------------------------------------/
				5280
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5281	int float128_eq_signaling( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5282	{
				5283
				5284	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5285	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5286	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5287	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5288	) {
				5289	float_raise( float_flag_invalid STATUS_VAR);
				5290	return 0;
				5291	}
				5292	return
				5293	( a.low == b.low )
				5294	&& ( ( a.high == b.high )
				5295	\|\| ( ( a.low == 0 )
				5296	&& ( (bits64) ( ( a.high \| b.high )<<1 ) == 0 ) )
				5297	);
				5298
				5299	}
				5300
				5301	/*----------------------------------------------------------------------------
				5302	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5303	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
				5304	\| cause an exception. Otherwise, the comparison is performed according to the
				5305	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
				5306	----------------------------------------------------------------------------/
				5307
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5308	int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5309	{
				5310	flag aSign, bSign;
				5311
				5312	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5313	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5314	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5315	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5316	) {
				5317	if ( float128_is_signaling_nan( a )
				5318	\|\| float128_is_signaling_nan( b ) ) {
				5319	float_raise( float_flag_invalid STATUS_VAR);
				5320	}
				5321	return 0;
				5322	}
				5323	aSign = extractFloat128Sign( a );
				5324	bSign = extractFloat128Sign( b );
				5325	if ( aSign != bSign ) {
				5326	return
				5327	aSign
				5328	\|\| ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5329	== 0 );
				5330	}
				5331	return
				5332	aSign ? le128( b.high, b.low, a.high, a.low )
				5333	: le128( a.high, a.low, b.high, b.low );
				5334
				5335	}
				5336
				5337	/*----------------------------------------------------------------------------
				5338	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
				5339	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
				5340	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
				5341	\| Standard for Binary Floating-Point Arithmetic.
				5342	----------------------------------------------------------------------------/
				5343
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5344	int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard	158142c	2005-03-13 16:54:06 +0000	[diff] [blame]	5345	{
				5346	flag aSign, bSign;
				5347
				5348	if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
				5349	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
				5350	\|\| ( ( extractFloat128Exp( b ) == 0x7FFF )
				5351	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
				5352	) {
				5353	if ( float128_is_signaling_nan( a )
				5354	\|\| float128_is_signaling_nan( b ) ) {
				5355	float_raise( float_flag_invalid STATUS_VAR);
				5356	}
				5357	return 0;
				5358	}
				5359	aSign = extractFloat128Sign( a );
				5360	bSign = extractFloat128Sign( b );
				5361	if ( aSign != bSign ) {
				5362	return
				5363	aSign
				5364	&& ( ( ( (bits64) ( ( a.high \| b.high )<<1 ) ) \| a.low \| b.low )
				5365	!= 0 );
				5366	}
				5367	return
				5368	aSign ? lt128( b.high, b.low, a.high, a.low )
				5369	: lt128( a.high, a.low, b.high, b.low );
				5370
				5371	}
				5372
				5373	#endif
				5374
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5375	/* misc functions */
				5376	float32 uint32_to_float32( unsigned int a STATUS_PARAM )
				5377	{
				5378	return int64_to_float32(a STATUS_VAR);
				5379	}
				5380
				5381	float64 uint32_to_float64( unsigned int a STATUS_PARAM )
				5382	{
				5383	return int64_to_float64(a STATUS_VAR);
				5384	}
				5385
				5386	unsigned int float32_to_uint32( float32 a STATUS_PARAM )
				5387	{
				5388	int64_t v;
				5389	unsigned int res;
				5390
				5391	v = float32_to_int64(a STATUS_VAR);
				5392	if (v < 0) {
				5393	res = 0;
				5394	float_raise( float_flag_invalid STATUS_VAR);
				5395	} else if (v > 0xffffffff) {
				5396	res = 0xffffffff;
				5397	float_raise( float_flag_invalid STATUS_VAR);
				5398	} else {
				5399	res = v;
				5400	}
				5401	return res;
				5402	}
				5403
				5404	unsigned int float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
				5405	{
				5406	int64_t v;
				5407	unsigned int res;
				5408
				5409	v = float32_to_int64_round_to_zero(a STATUS_VAR);
				5410	if (v < 0) {
				5411	res = 0;
				5412	float_raise( float_flag_invalid STATUS_VAR);
				5413	} else if (v > 0xffffffff) {
				5414	res = 0xffffffff;
				5415	float_raise( float_flag_invalid STATUS_VAR);
				5416	} else {
				5417	res = v;
				5418	}
				5419	return res;
				5420	}
				5421
				5422	unsigned int float64_to_uint32( float64 a STATUS_PARAM )
				5423	{
				5424	int64_t v;
				5425	unsigned int res;
				5426
				5427	v = float64_to_int64(a STATUS_VAR);
				5428	if (v < 0) {
				5429	res = 0;
				5430	float_raise( float_flag_invalid STATUS_VAR);
				5431	} else if (v > 0xffffffff) {
				5432	res = 0xffffffff;
				5433	float_raise( float_flag_invalid STATUS_VAR);
				5434	} else {
				5435	res = v;
				5436	}
				5437	return res;
				5438	}
				5439
				5440	unsigned int float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
				5441	{
				5442	int64_t v;
				5443	unsigned int res;
				5444
				5445	v = float64_to_int64_round_to_zero(a STATUS_VAR);
				5446	if (v < 0) {
				5447	res = 0;
				5448	float_raise( float_flag_invalid STATUS_VAR);
				5449	} else if (v > 0xffffffff) {
				5450	res = 0xffffffff;
				5451	float_raise( float_flag_invalid STATUS_VAR);
				5452	} else {
				5453	res = v;
				5454	}
				5455	return res;
				5456	}
				5457
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5458	/* FIXME: This looks broken. */
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	5459	uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
				5460	{
				5461	int64_t v;
				5462
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5463	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				5464	v += float64_val(a);
				5465	v = float64_to_int64(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	5466
				5467	return v - INT64_MIN;
				5468	}
				5469
				5470	uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
				5471	{
				5472	int64_t v;
				5473
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5474	v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
				5475	v += float64_val(a);
				5476	v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
j_mayer	75d62a5	2007-03-20 22:10:42 +0000	[diff] [blame]	5477
				5478	return v - INT64_MIN;
				5479	}
				5480
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5481	#define COMPARE(s, nan_exp) \
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5482	INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5483	int is_quiet STATUS_PARAM ) \
				5484	{ \
				5485	flag aSign, bSign; \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5486	bits ## s av, bv; \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5487	\
				5488	if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
				5489	extractFloat ## s ## Frac( a ) ) \|\| \
				5490	( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
				5491	extractFloat ## s ## Frac( b ) )) { \
				5492	if (!is_quiet \|\| \
				5493	float ## s ## _is_signaling_nan( a ) \|\| \
				5494	float ## s ## _is_signaling_nan( b ) ) { \
				5495	float_raise( float_flag_invalid STATUS_VAR); \
				5496	} \
				5497	return float_relation_unordered; \
				5498	} \
				5499	aSign = extractFloat ## s ## Sign( a ); \
				5500	bSign = extractFloat ## s ## Sign( b ); \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5501	av = float ## s ## _val(a); \
blueswir1	cd8a253	2007-11-21 18:57:44 +0000	[diff] [blame]	5502	bv = float ## s ## _val(b); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5503	if ( aSign != bSign ) { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5504	if ( (bits ## s) ( ( av \| bv )<<1 ) == 0 ) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5505	/* zero case */ \
				5506	return float_relation_equal; \
				5507	} else { \
				5508	return 1 - (2 * aSign); \
				5509	} \
				5510	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5511	if (av == bv) { \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5512	return float_relation_equal; \
				5513	} else { \
pbrook	f090c9d	2007-11-18 14:33:24 +0000	[diff] [blame]	5514	return 1 - 2 * (aSign ^ ( av < bv )); \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5515	} \
				5516	} \
				5517	} \
				5518	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5519	int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5520	{ \
				5521	return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
				5522	} \
				5523	\
bellard	750afe9	2006-10-28 19:27:11 +0000	[diff] [blame]	5524	int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard	1d6bda3	2005-03-13 18:52:29 +0000	[diff] [blame]	5525	{ \
				5526	return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
				5527	}
				5528
				5529	COMPARE(32, 0xff)
				5530	COMPARE(64, 0x7ff)
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5531
blueswir1	1f58732	2007-11-25 18:40:20 +0000	[diff] [blame]	5532	INLINE int float128_compare_internal( float128 a, float128 b,
				5533	int is_quiet STATUS_PARAM )
				5534	{
				5535	flag aSign, bSign;
				5536
				5537	if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
				5538	( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) ) \|\|
				5539	( ( extractFloat128Exp( b ) == 0x7fff ) &&
				5540	( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )) {
				5541	if (!is_quiet \|\|
				5542	float128_is_signaling_nan( a ) \|\|
				5543	float128_is_signaling_nan( b ) ) {
				5544	float_raise( float_flag_invalid STATUS_VAR);
				5545	}
				5546	return float_relation_unordered;
				5547	}
				5548	aSign = extractFloat128Sign( a );
				5549	bSign = extractFloat128Sign( b );
				5550	if ( aSign != bSign ) {
				5551	if ( ( ( ( a.high \| b.high )<<1 ) \| a.low \| b.low ) == 0 ) {
				5552	/* zero case */
				5553	return float_relation_equal;
				5554	} else {
				5555	return 1 - (2 * aSign);
				5556	}
				5557	} else {
				5558	if (a.low == b.low && a.high == b.high) {
				5559	return float_relation_equal;
				5560	} else {
				5561	return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
				5562	}
				5563	}
				5564	}
				5565
				5566	int float128_compare( float128 a, float128 b STATUS_PARAM )
				5567	{
				5568	return float128_compare_internal(a, b, 0 STATUS_VAR);
				5569	}
				5570
				5571	int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
				5572	{
				5573	return float128_compare_internal(a, b, 1 STATUS_VAR);
				5574	}
				5575
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5576	/* Multiply A by 2 raised to the power N. */
				5577	float32 float32_scalbn( float32 a, int n STATUS_PARAM )
				5578	{
				5579	flag aSign;
				5580	int16 aExp;
				5581	bits32 aSig;
				5582
				5583	aSig = extractFloat32Frac( a );
				5584	aExp = extractFloat32Exp( a );
				5585	aSign = extractFloat32Sign( a );
				5586
				5587	if ( aExp == 0xFF ) {
				5588	return a;
				5589	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	5590	if ( aExp != 0 )
				5591	aSig \|= 0x00800000;
				5592	else if ( aSig == 0 )
				5593	return a;
				5594
				5595	aExp += n - 1;
				5596	aSig <<= 7;
				5597	return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5598	}
				5599
				5600	float64 float64_scalbn( float64 a, int n STATUS_PARAM )
				5601	{
				5602	flag aSign;
				5603	int16 aExp;
				5604	bits64 aSig;
				5605
				5606	aSig = extractFloat64Frac( a );
				5607	aExp = extractFloat64Exp( a );
				5608	aSign = extractFloat64Sign( a );
				5609
				5610	if ( aExp == 0x7FF ) {
				5611	return a;
				5612	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	5613	if ( aExp != 0 )
				5614	aSig \|= LIT64( 0x0010000000000000 );
				5615	else if ( aSig == 0 )
				5616	return a;
				5617
				5618	aExp += n - 1;
				5619	aSig <<= 10;
				5620	return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5621	}
				5622
				5623	#ifdef FLOATX80
				5624	floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
				5625	{
				5626	flag aSign;
				5627	int16 aExp;
				5628	bits64 aSig;
				5629
				5630	aSig = extractFloatx80Frac( a );
				5631	aExp = extractFloatx80Exp( a );
				5632	aSign = extractFloatx80Sign( a );
				5633
				5634	if ( aExp == 0x7FF ) {
				5635	return a;
				5636	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	5637	if (aExp == 0 && aSig == 0)
				5638	return a;
				5639
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5640	aExp += n;
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	5641	return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
				5642	aSign, aExp, aSig, 0 STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5643	}
				5644	#endif
				5645
				5646	#ifdef FLOAT128
				5647	float128 float128_scalbn( float128 a, int n STATUS_PARAM )
				5648	{
				5649	flag aSign;
				5650	int32 aExp;
				5651	bits64 aSig0, aSig1;
				5652
				5653	aSig1 = extractFloat128Frac1( a );
				5654	aSig0 = extractFloat128Frac0( a );
				5655	aExp = extractFloat128Exp( a );
				5656	aSign = extractFloat128Sign( a );
				5657	if ( aExp == 0x7FFF ) {
				5658	return a;
				5659	}
pbrook	6939754	2008-12-19 12:59:28 +0000	[diff] [blame]	5660	if ( aExp != 0 )
				5661	aSig0 \|= LIT64( 0x0001000000000000 );
				5662	else if ( aSig0 == 0 && aSig1 == 0 )
				5663	return a;
				5664
				5665	aExp += n - 1;
				5666	return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
				5667	STATUS_VAR );
pbrook	9ee6e8b	2007-11-11 00:04:49 +0000	[diff] [blame]	5668
				5669	}
				5670	#endif