blob: 81a7d1ae09ecb2545867d2b4dcab92675ec5ce4d [file] [log] [blame]
Andreas Färber8d725fa2011-03-07 01:34:04 +01001/*
2 * QEMU float support
3 *
4 * Derived from SoftFloat.
5 */
bellard158142c2005-03-13 16:54:06 +00006
7/*============================================================================
8
9This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10Package, Release 2b.
11
12Written by John R. Hauser. This work was made possible in part by the
13International Computer Science Institute, located at Suite 600, 1947 Center
14Street, Berkeley, California 94704. Funding was partially provided by the
15National Science Foundation under grant MIP-9311980. The original version
16of this code was written as part of a project to build a fixed-point vector
17processor in collaboration with the University of California at Berkeley,
18overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20arithmetic/SoftFloat.html'.
21
22THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30
31Derivative works are acceptable, even for commercial purposes, so long as
32(1) the source code for the derivative work includes prominent notice that
33the work is derivative, and (2) the source code includes prominent notice with
34these four paragraphs for those parts of this code that are retained.
35
36=============================================================================*/
37
Peter Maydell2ac8bd02011-09-26 16:56:55 +010038/* softfloat (and in particular the code in softfloat-specialize.h) is
39 * target-dependent and needs the TARGET_* macros.
40 */
41#include "config.h"
42
bellard158142c2005-03-13 16:54:06 +000043#include "softfloat.h"
44
45/*----------------------------------------------------------------------------
46| Primitive arithmetic functions, including multi-word arithmetic, and
47| division and square root approximations. (Can be specialized to target if
48| desired.)
49*----------------------------------------------------------------------------*/
50#include "softfloat-macros.h"
51
52/*----------------------------------------------------------------------------
53| Functions and definitions to determine: (1) whether tininess for underflow
54| is detected before or after rounding by default, (2) what (if anything)
55| happens when exceptions are raised, (3) how signaling NaNs are distinguished
56| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
57| are propagated from function inputs to output. These details are target-
58| specific.
59*----------------------------------------------------------------------------*/
60#include "softfloat-specialize.h"
61
62void set_float_rounding_mode(int val STATUS_PARAM)
63{
64 STATUS(float_rounding_mode) = val;
65}
66
bellard1d6bda32005-03-13 18:52:29 +000067void set_float_exception_flags(int val STATUS_PARAM)
68{
69 STATUS(float_exception_flags) = val;
70}
71
bellard158142c2005-03-13 16:54:06 +000072void set_floatx80_rounding_precision(int val STATUS_PARAM)
73{
74 STATUS(floatx80_rounding_precision) = val;
75}
bellard158142c2005-03-13 16:54:06 +000076
77/*----------------------------------------------------------------------------
Peter Maydellbb4d4bb2011-02-10 11:28:56 +000078| Returns the fraction bits of the half-precision floating-point value `a'.
79*----------------------------------------------------------------------------*/
80
81INLINE uint32_t extractFloat16Frac(float16 a)
82{
83 return float16_val(a) & 0x3ff;
84}
85
86/*----------------------------------------------------------------------------
87| Returns the exponent bits of the half-precision floating-point value `a'.
88*----------------------------------------------------------------------------*/
89
90INLINE int16 extractFloat16Exp(float16 a)
91{
92 return (float16_val(a) >> 10) & 0x1f;
93}
94
95/*----------------------------------------------------------------------------
96| Returns the sign bit of the single-precision floating-point value `a'.
97*----------------------------------------------------------------------------*/
98
99INLINE flag extractFloat16Sign(float16 a)
100{
101 return float16_val(a)>>15;
102}
103
104/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000105| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
106| and 7, and returns the properly rounded 32-bit integer corresponding to the
107| input. If `zSign' is 1, the input is negated before being converted to an
108| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
109| is simply rounded to an integer, with the inexact exception raised if the
110| input cannot be represented exactly as an integer. However, if the fixed-
111| point input is too large, the invalid exception is raised and the largest
112| positive or negative integer is returned.
113*----------------------------------------------------------------------------*/
114
Andreas Färberbb98fe42011-03-07 01:34:06 +0100115static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000116{
117 int8 roundingMode;
118 flag roundNearestEven;
119 int8 roundIncrement, roundBits;
120 int32 z;
121
122 roundingMode = STATUS(float_rounding_mode);
123 roundNearestEven = ( roundingMode == float_round_nearest_even );
124 roundIncrement = 0x40;
125 if ( ! roundNearestEven ) {
126 if ( roundingMode == float_round_to_zero ) {
127 roundIncrement = 0;
128 }
129 else {
130 roundIncrement = 0x7F;
131 if ( zSign ) {
132 if ( roundingMode == float_round_up ) roundIncrement = 0;
133 }
134 else {
135 if ( roundingMode == float_round_down ) roundIncrement = 0;
136 }
137 }
138 }
139 roundBits = absZ & 0x7F;
140 absZ = ( absZ + roundIncrement )>>7;
141 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
142 z = absZ;
143 if ( zSign ) z = - z;
144 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
145 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +0100146 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +0000147 }
148 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
149 return z;
150
151}
152
153/*----------------------------------------------------------------------------
154| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
155| `absZ1', with binary point between bits 63 and 64 (between the input words),
156| and returns the properly rounded 64-bit integer corresponding to the input.
157| If `zSign' is 1, the input is negated before being converted to an integer.
158| Ordinarily, the fixed-point input is simply rounded to an integer, with
159| the inexact exception raised if the input cannot be represented exactly as
160| an integer. However, if the fixed-point input is too large, the invalid
161| exception is raised and the largest positive or negative integer is
162| returned.
163*----------------------------------------------------------------------------*/
164
Andreas Färberbb98fe42011-03-07 01:34:06 +0100165static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000166{
167 int8 roundingMode;
168 flag roundNearestEven, increment;
169 int64 z;
170
171 roundingMode = STATUS(float_rounding_mode);
172 roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färberbb98fe42011-03-07 01:34:06 +0100173 increment = ( (int64_t) absZ1 < 0 );
bellard158142c2005-03-13 16:54:06 +0000174 if ( ! roundNearestEven ) {
175 if ( roundingMode == float_round_to_zero ) {
176 increment = 0;
177 }
178 else {
179 if ( zSign ) {
180 increment = ( roundingMode == float_round_down ) && absZ1;
181 }
182 else {
183 increment = ( roundingMode == float_round_up ) && absZ1;
184 }
185 }
186 }
187 if ( increment ) {
188 ++absZ0;
189 if ( absZ0 == 0 ) goto overflow;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100190 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
bellard158142c2005-03-13 16:54:06 +0000191 }
192 z = absZ0;
193 if ( zSign ) z = - z;
194 if ( z && ( ( z < 0 ) ^ zSign ) ) {
195 overflow:
196 float_raise( float_flag_invalid STATUS_VAR);
197 return
Andreas Färberbb98fe42011-03-07 01:34:06 +0100198 zSign ? (int64_t) LIT64( 0x8000000000000000 )
bellard158142c2005-03-13 16:54:06 +0000199 : LIT64( 0x7FFFFFFFFFFFFFFF );
200 }
201 if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
202 return z;
203
204}
205
206/*----------------------------------------------------------------------------
207| Returns the fraction bits of the single-precision floating-point value `a'.
208*----------------------------------------------------------------------------*/
209
Andreas Färberbb98fe42011-03-07 01:34:06 +0100210INLINE uint32_t extractFloat32Frac( float32 a )
bellard158142c2005-03-13 16:54:06 +0000211{
212
pbrookf090c9d2007-11-18 14:33:24 +0000213 return float32_val(a) & 0x007FFFFF;
bellard158142c2005-03-13 16:54:06 +0000214
215}
216
217/*----------------------------------------------------------------------------
218| Returns the exponent bits of the single-precision floating-point value `a'.
219*----------------------------------------------------------------------------*/
220
221INLINE int16 extractFloat32Exp( float32 a )
222{
223
pbrookf090c9d2007-11-18 14:33:24 +0000224 return ( float32_val(a)>>23 ) & 0xFF;
bellard158142c2005-03-13 16:54:06 +0000225
226}
227
228/*----------------------------------------------------------------------------
229| Returns the sign bit of the single-precision floating-point value `a'.
230*----------------------------------------------------------------------------*/
231
232INLINE flag extractFloat32Sign( float32 a )
233{
234
pbrookf090c9d2007-11-18 14:33:24 +0000235 return float32_val(a)>>31;
bellard158142c2005-03-13 16:54:06 +0000236
237}
238
239/*----------------------------------------------------------------------------
Peter Maydell37d18662011-01-06 19:37:53 +0000240| If `a' is denormal and we are in flush-to-zero mode then set the
241| input-denormal exception and return zero. Otherwise just return the value.
242*----------------------------------------------------------------------------*/
243static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
244{
245 if (STATUS(flush_inputs_to_zero)) {
246 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
247 float_raise(float_flag_input_denormal STATUS_VAR);
248 return make_float32(float32_val(a) & 0x80000000);
249 }
250 }
251 return a;
252}
253
254/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000255| Normalizes the subnormal single-precision floating-point value represented
256| by the denormalized significand `aSig'. The normalized exponent and
257| significand are stored at the locations pointed to by `zExpPtr' and
258| `zSigPtr', respectively.
259*----------------------------------------------------------------------------*/
260
261static void
Andreas Färberbb98fe42011-03-07 01:34:06 +0100262 normalizeFloat32Subnormal( uint32_t aSig, int16 *zExpPtr, uint32_t *zSigPtr )
bellard158142c2005-03-13 16:54:06 +0000263{
264 int8 shiftCount;
265
266 shiftCount = countLeadingZeros32( aSig ) - 8;
267 *zSigPtr = aSig<<shiftCount;
268 *zExpPtr = 1 - shiftCount;
269
270}
271
272/*----------------------------------------------------------------------------
273| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
274| single-precision floating-point value, returning the result. After being
275| shifted into the proper positions, the three fields are simply added
276| together to form the result. This means that any integer portion of `zSig'
277| will be added into the exponent. Since a properly normalized significand
278| will have an integer portion equal to 1, the `zExp' input should be 1 less
279| than the desired result exponent whenever `zSig' is a complete, normalized
280| significand.
281*----------------------------------------------------------------------------*/
282
Andreas Färberbb98fe42011-03-07 01:34:06 +0100283INLINE float32 packFloat32( flag zSign, int16 zExp, uint32_t zSig )
bellard158142c2005-03-13 16:54:06 +0000284{
285
pbrookf090c9d2007-11-18 14:33:24 +0000286 return make_float32(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100287 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
bellard158142c2005-03-13 16:54:06 +0000288
289}
290
291/*----------------------------------------------------------------------------
292| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
293| and significand `zSig', and returns the proper single-precision floating-
294| point value corresponding to the abstract input. Ordinarily, the abstract
295| value is simply rounded and packed into the single-precision format, with
296| the inexact exception raised if the abstract input cannot be represented
297| exactly. However, if the abstract value is too large, the overflow and
298| inexact exceptions are raised and an infinity or maximal finite value is
299| returned. If the abstract value is too small, the input value is rounded to
300| a subnormal number, and the underflow and inexact exceptions are raised if
301| the abstract input cannot be represented exactly as a subnormal single-
302| precision floating-point number.
303| The input significand `zSig' has its binary point between bits 30
304| and 29, which is 7 bits to the left of the usual location. This shifted
305| significand must be normalized or smaller. If `zSig' is not normalized,
306| `zExp' must be 0; in that case, the result returned is a subnormal number,
307| and it must not require rounding. In the usual case that `zSig' is
308| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
309| The handling of underflow and overflow follows the IEC/IEEE Standard for
310| Binary Floating-Point Arithmetic.
311*----------------------------------------------------------------------------*/
312
Andreas Färberbb98fe42011-03-07 01:34:06 +0100313static float32 roundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000314{
315 int8 roundingMode;
316 flag roundNearestEven;
317 int8 roundIncrement, roundBits;
318 flag isTiny;
319
320 roundingMode = STATUS(float_rounding_mode);
321 roundNearestEven = ( roundingMode == float_round_nearest_even );
322 roundIncrement = 0x40;
323 if ( ! roundNearestEven ) {
324 if ( roundingMode == float_round_to_zero ) {
325 roundIncrement = 0;
326 }
327 else {
328 roundIncrement = 0x7F;
329 if ( zSign ) {
330 if ( roundingMode == float_round_up ) roundIncrement = 0;
331 }
332 else {
333 if ( roundingMode == float_round_down ) roundIncrement = 0;
334 }
335 }
336 }
337 roundBits = zSig & 0x7F;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100338 if ( 0xFD <= (uint16_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +0000339 if ( ( 0xFD < zExp )
340 || ( ( zExp == 0xFD )
Andreas Färberbb98fe42011-03-07 01:34:06 +0100341 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
bellard158142c2005-03-13 16:54:06 +0000342 ) {
343 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
pbrookf090c9d2007-11-18 14:33:24 +0000344 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard158142c2005-03-13 16:54:06 +0000345 }
346 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100347 if (STATUS(flush_to_zero)) {
348 float_raise(float_flag_output_denormal STATUS_VAR);
349 return packFloat32(zSign, 0, 0);
350 }
bellard158142c2005-03-13 16:54:06 +0000351 isTiny =
352 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
353 || ( zExp < -1 )
354 || ( zSig + roundIncrement < 0x80000000 );
355 shift32RightJamming( zSig, - zExp, &zSig );
356 zExp = 0;
357 roundBits = zSig & 0x7F;
358 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
359 }
360 }
361 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
362 zSig = ( zSig + roundIncrement )>>7;
363 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
364 if ( zSig == 0 ) zExp = 0;
365 return packFloat32( zSign, zExp, zSig );
366
367}
368
369/*----------------------------------------------------------------------------
370| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
371| and significand `zSig', and returns the proper single-precision floating-
372| point value corresponding to the abstract input. This routine is just like
373| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
374| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
375| floating-point exponent.
376*----------------------------------------------------------------------------*/
377
378static float32
Andreas Färberbb98fe42011-03-07 01:34:06 +0100379 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, uint32_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000380{
381 int8 shiftCount;
382
383 shiftCount = countLeadingZeros32( zSig ) - 1;
384 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
385
386}
387
388/*----------------------------------------------------------------------------
389| Returns the fraction bits of the double-precision floating-point value `a'.
390*----------------------------------------------------------------------------*/
391
Andreas Färberbb98fe42011-03-07 01:34:06 +0100392INLINE uint64_t extractFloat64Frac( float64 a )
bellard158142c2005-03-13 16:54:06 +0000393{
394
pbrookf090c9d2007-11-18 14:33:24 +0000395 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard158142c2005-03-13 16:54:06 +0000396
397}
398
399/*----------------------------------------------------------------------------
400| Returns the exponent bits of the double-precision floating-point value `a'.
401*----------------------------------------------------------------------------*/
402
403INLINE int16 extractFloat64Exp( float64 a )
404{
405
pbrookf090c9d2007-11-18 14:33:24 +0000406 return ( float64_val(a)>>52 ) & 0x7FF;
bellard158142c2005-03-13 16:54:06 +0000407
408}
409
410/*----------------------------------------------------------------------------
411| Returns the sign bit of the double-precision floating-point value `a'.
412*----------------------------------------------------------------------------*/
413
414INLINE flag extractFloat64Sign( float64 a )
415{
416
pbrookf090c9d2007-11-18 14:33:24 +0000417 return float64_val(a)>>63;
bellard158142c2005-03-13 16:54:06 +0000418
419}
420
421/*----------------------------------------------------------------------------
Peter Maydell37d18662011-01-06 19:37:53 +0000422| If `a' is denormal and we are in flush-to-zero mode then set the
423| input-denormal exception and return zero. Otherwise just return the value.
424*----------------------------------------------------------------------------*/
425static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
426{
427 if (STATUS(flush_inputs_to_zero)) {
428 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
429 float_raise(float_flag_input_denormal STATUS_VAR);
430 return make_float64(float64_val(a) & (1ULL << 63));
431 }
432 }
433 return a;
434}
435
436/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000437| Normalizes the subnormal double-precision floating-point value represented
438| by the denormalized significand `aSig'. The normalized exponent and
439| significand are stored at the locations pointed to by `zExpPtr' and
440| `zSigPtr', respectively.
441*----------------------------------------------------------------------------*/
442
443static void
Andreas Färberbb98fe42011-03-07 01:34:06 +0100444 normalizeFloat64Subnormal( uint64_t aSig, int16 *zExpPtr, uint64_t *zSigPtr )
bellard158142c2005-03-13 16:54:06 +0000445{
446 int8 shiftCount;
447
448 shiftCount = countLeadingZeros64( aSig ) - 11;
449 *zSigPtr = aSig<<shiftCount;
450 *zExpPtr = 1 - shiftCount;
451
452}
453
454/*----------------------------------------------------------------------------
455| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
456| double-precision floating-point value, returning the result. After being
457| shifted into the proper positions, the three fields are simply added
458| together to form the result. This means that any integer portion of `zSig'
459| will be added into the exponent. Since a properly normalized significand
460| will have an integer portion equal to 1, the `zExp' input should be 1 less
461| than the desired result exponent whenever `zSig' is a complete, normalized
462| significand.
463*----------------------------------------------------------------------------*/
464
Andreas Färberbb98fe42011-03-07 01:34:06 +0100465INLINE float64 packFloat64( flag zSign, int16 zExp, uint64_t zSig )
bellard158142c2005-03-13 16:54:06 +0000466{
467
pbrookf090c9d2007-11-18 14:33:24 +0000468 return make_float64(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100469 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
bellard158142c2005-03-13 16:54:06 +0000470
471}
472
473/*----------------------------------------------------------------------------
474| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
475| and significand `zSig', and returns the proper double-precision floating-
476| point value corresponding to the abstract input. Ordinarily, the abstract
477| value is simply rounded and packed into the double-precision format, with
478| the inexact exception raised if the abstract input cannot be represented
479| exactly. However, if the abstract value is too large, the overflow and
480| inexact exceptions are raised and an infinity or maximal finite value is
481| returned. If the abstract value is too small, the input value is rounded
482| to a subnormal number, and the underflow and inexact exceptions are raised
483| if the abstract input cannot be represented exactly as a subnormal double-
484| precision floating-point number.
485| The input significand `zSig' has its binary point between bits 62
486| and 61, which is 10 bits to the left of the usual location. This shifted
487| significand must be normalized or smaller. If `zSig' is not normalized,
488| `zExp' must be 0; in that case, the result returned is a subnormal number,
489| and it must not require rounding. In the usual case that `zSig' is
490| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
491| The handling of underflow and overflow follows the IEC/IEEE Standard for
492| Binary Floating-Point Arithmetic.
493*----------------------------------------------------------------------------*/
494
Andreas Färberbb98fe42011-03-07 01:34:06 +0100495static float64 roundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000496{
497 int8 roundingMode;
498 flag roundNearestEven;
499 int16 roundIncrement, roundBits;
500 flag isTiny;
501
502 roundingMode = STATUS(float_rounding_mode);
503 roundNearestEven = ( roundingMode == float_round_nearest_even );
504 roundIncrement = 0x200;
505 if ( ! roundNearestEven ) {
506 if ( roundingMode == float_round_to_zero ) {
507 roundIncrement = 0;
508 }
509 else {
510 roundIncrement = 0x3FF;
511 if ( zSign ) {
512 if ( roundingMode == float_round_up ) roundIncrement = 0;
513 }
514 else {
515 if ( roundingMode == float_round_down ) roundIncrement = 0;
516 }
517 }
518 }
519 roundBits = zSig & 0x3FF;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100520 if ( 0x7FD <= (uint16_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +0000521 if ( ( 0x7FD < zExp )
522 || ( ( zExp == 0x7FD )
Andreas Färberbb98fe42011-03-07 01:34:06 +0100523 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
bellard158142c2005-03-13 16:54:06 +0000524 ) {
525 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
pbrookf090c9d2007-11-18 14:33:24 +0000526 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard158142c2005-03-13 16:54:06 +0000527 }
528 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100529 if (STATUS(flush_to_zero)) {
530 float_raise(float_flag_output_denormal STATUS_VAR);
531 return packFloat64(zSign, 0, 0);
532 }
bellard158142c2005-03-13 16:54:06 +0000533 isTiny =
534 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
535 || ( zExp < -1 )
536 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
537 shift64RightJamming( zSig, - zExp, &zSig );
538 zExp = 0;
539 roundBits = zSig & 0x3FF;
540 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
541 }
542 }
543 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
544 zSig = ( zSig + roundIncrement )>>10;
545 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
546 if ( zSig == 0 ) zExp = 0;
547 return packFloat64( zSign, zExp, zSig );
548
549}
550
551/*----------------------------------------------------------------------------
552| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
553| and significand `zSig', and returns the proper double-precision floating-
554| point value corresponding to the abstract input. This routine is just like
555| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
556| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
557| floating-point exponent.
558*----------------------------------------------------------------------------*/
559
560static float64
Andreas Färberbb98fe42011-03-07 01:34:06 +0100561 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, uint64_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000562{
563 int8 shiftCount;
564
565 shiftCount = countLeadingZeros64( zSig ) - 1;
566 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
567
568}
569
bellard158142c2005-03-13 16:54:06 +0000570/*----------------------------------------------------------------------------
571| Returns the fraction bits of the extended double-precision floating-point
572| value `a'.
573*----------------------------------------------------------------------------*/
574
Andreas Färberbb98fe42011-03-07 01:34:06 +0100575INLINE uint64_t extractFloatx80Frac( floatx80 a )
bellard158142c2005-03-13 16:54:06 +0000576{
577
578 return a.low;
579
580}
581
582/*----------------------------------------------------------------------------
583| Returns the exponent bits of the extended double-precision floating-point
584| value `a'.
585*----------------------------------------------------------------------------*/
586
587INLINE int32 extractFloatx80Exp( floatx80 a )
588{
589
590 return a.high & 0x7FFF;
591
592}
593
594/*----------------------------------------------------------------------------
595| Returns the sign bit of the extended double-precision floating-point value
596| `a'.
597*----------------------------------------------------------------------------*/
598
599INLINE flag extractFloatx80Sign( floatx80 a )
600{
601
602 return a.high>>15;
603
604}
605
606/*----------------------------------------------------------------------------
607| Normalizes the subnormal extended double-precision floating-point value
608| represented by the denormalized significand `aSig'. The normalized exponent
609| and significand are stored at the locations pointed to by `zExpPtr' and
610| `zSigPtr', respectively.
611*----------------------------------------------------------------------------*/
612
613static void
Andreas Färberbb98fe42011-03-07 01:34:06 +0100614 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
bellard158142c2005-03-13 16:54:06 +0000615{
616 int8 shiftCount;
617
618 shiftCount = countLeadingZeros64( aSig );
619 *zSigPtr = aSig<<shiftCount;
620 *zExpPtr = 1 - shiftCount;
621
622}
623
624/*----------------------------------------------------------------------------
625| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
626| extended double-precision floating-point value, returning the result.
627*----------------------------------------------------------------------------*/
628
Andreas Färberbb98fe42011-03-07 01:34:06 +0100629INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
bellard158142c2005-03-13 16:54:06 +0000630{
631 floatx80 z;
632
633 z.low = zSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100634 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
bellard158142c2005-03-13 16:54:06 +0000635 return z;
636
637}
638
639/*----------------------------------------------------------------------------
640| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
641| and extended significand formed by the concatenation of `zSig0' and `zSig1',
642| and returns the proper extended double-precision floating-point value
643| corresponding to the abstract input. Ordinarily, the abstract value is
644| rounded and packed into the extended double-precision format, with the
645| inexact exception raised if the abstract input cannot be represented
646| exactly. However, if the abstract value is too large, the overflow and
647| inexact exceptions are raised and an infinity or maximal finite value is
648| returned. If the abstract value is too small, the input value is rounded to
649| a subnormal number, and the underflow and inexact exceptions are raised if
650| the abstract input cannot be represented exactly as a subnormal extended
651| double-precision floating-point number.
652| If `roundingPrecision' is 32 or 64, the result is rounded to the same
653| number of bits as single or double precision, respectively. Otherwise, the
654| result is rounded to the full precision of the extended double-precision
655| format.
656| The input significand must be normalized or smaller. If the input
657| significand is not normalized, `zExp' must be 0; in that case, the result
658| returned is a subnormal number, and it must not require rounding. The
659| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
660| Floating-Point Arithmetic.
661*----------------------------------------------------------------------------*/
662
663static floatx80
664 roundAndPackFloatx80(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100665 int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard158142c2005-03-13 16:54:06 +0000666 STATUS_PARAM)
667{
668 int8 roundingMode;
669 flag roundNearestEven, increment, isTiny;
670 int64 roundIncrement, roundMask, roundBits;
671
672 roundingMode = STATUS(float_rounding_mode);
673 roundNearestEven = ( roundingMode == float_round_nearest_even );
674 if ( roundingPrecision == 80 ) goto precision80;
675 if ( roundingPrecision == 64 ) {
676 roundIncrement = LIT64( 0x0000000000000400 );
677 roundMask = LIT64( 0x00000000000007FF );
678 }
679 else if ( roundingPrecision == 32 ) {
680 roundIncrement = LIT64( 0x0000008000000000 );
681 roundMask = LIT64( 0x000000FFFFFFFFFF );
682 }
683 else {
684 goto precision80;
685 }
686 zSig0 |= ( zSig1 != 0 );
687 if ( ! roundNearestEven ) {
688 if ( roundingMode == float_round_to_zero ) {
689 roundIncrement = 0;
690 }
691 else {
692 roundIncrement = roundMask;
693 if ( zSign ) {
694 if ( roundingMode == float_round_up ) roundIncrement = 0;
695 }
696 else {
697 if ( roundingMode == float_round_down ) roundIncrement = 0;
698 }
699 }
700 }
701 roundBits = zSig0 & roundMask;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100702 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard158142c2005-03-13 16:54:06 +0000703 if ( ( 0x7FFE < zExp )
704 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
705 ) {
706 goto overflow;
707 }
708 if ( zExp <= 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100709 if (STATUS(flush_to_zero)) {
710 float_raise(float_flag_output_denormal STATUS_VAR);
711 return packFloatx80(zSign, 0, 0);
712 }
bellard158142c2005-03-13 16:54:06 +0000713 isTiny =
714 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
715 || ( zExp < 0 )
716 || ( zSig0 <= zSig0 + roundIncrement );
717 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
718 zExp = 0;
719 roundBits = zSig0 & roundMask;
720 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
721 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
722 zSig0 += roundIncrement;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100723 if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard158142c2005-03-13 16:54:06 +0000724 roundIncrement = roundMask + 1;
725 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
726 roundMask |= roundIncrement;
727 }
728 zSig0 &= ~ roundMask;
729 return packFloatx80( zSign, zExp, zSig0 );
730 }
731 }
732 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
733 zSig0 += roundIncrement;
734 if ( zSig0 < roundIncrement ) {
735 ++zExp;
736 zSig0 = LIT64( 0x8000000000000000 );
737 }
738 roundIncrement = roundMask + 1;
739 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
740 roundMask |= roundIncrement;
741 }
742 zSig0 &= ~ roundMask;
743 if ( zSig0 == 0 ) zExp = 0;
744 return packFloatx80( zSign, zExp, zSig0 );
745 precision80:
Andreas Färberbb98fe42011-03-07 01:34:06 +0100746 increment = ( (int64_t) zSig1 < 0 );
bellard158142c2005-03-13 16:54:06 +0000747 if ( ! roundNearestEven ) {
748 if ( roundingMode == float_round_to_zero ) {
749 increment = 0;
750 }
751 else {
752 if ( zSign ) {
753 increment = ( roundingMode == float_round_down ) && zSig1;
754 }
755 else {
756 increment = ( roundingMode == float_round_up ) && zSig1;
757 }
758 }
759 }
Andreas Färberbb98fe42011-03-07 01:34:06 +0100760 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard158142c2005-03-13 16:54:06 +0000761 if ( ( 0x7FFE < zExp )
762 || ( ( zExp == 0x7FFE )
763 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
764 && increment
765 )
766 ) {
767 roundMask = 0;
768 overflow:
769 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
770 if ( ( roundingMode == float_round_to_zero )
771 || ( zSign && ( roundingMode == float_round_up ) )
772 || ( ! zSign && ( roundingMode == float_round_down ) )
773 ) {
774 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
775 }
776 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
777 }
778 if ( zExp <= 0 ) {
779 isTiny =
780 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
781 || ( zExp < 0 )
782 || ! increment
783 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
784 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
785 zExp = 0;
786 if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
787 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
788 if ( roundNearestEven ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +0100789 increment = ( (int64_t) zSig1 < 0 );
bellard158142c2005-03-13 16:54:06 +0000790 }
791 else {
792 if ( zSign ) {
793 increment = ( roundingMode == float_round_down ) && zSig1;
794 }
795 else {
796 increment = ( roundingMode == float_round_up ) && zSig1;
797 }
798 }
799 if ( increment ) {
800 ++zSig0;
801 zSig0 &=
Andreas Färberbb98fe42011-03-07 01:34:06 +0100802 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
803 if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard158142c2005-03-13 16:54:06 +0000804 }
805 return packFloatx80( zSign, zExp, zSig0 );
806 }
807 }
808 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
809 if ( increment ) {
810 ++zSig0;
811 if ( zSig0 == 0 ) {
812 ++zExp;
813 zSig0 = LIT64( 0x8000000000000000 );
814 }
815 else {
Andreas Färberbb98fe42011-03-07 01:34:06 +0100816 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
bellard158142c2005-03-13 16:54:06 +0000817 }
818 }
819 else {
820 if ( zSig0 == 0 ) zExp = 0;
821 }
822 return packFloatx80( zSign, zExp, zSig0 );
823
824}
825
826/*----------------------------------------------------------------------------
827| Takes an abstract floating-point value having sign `zSign', exponent
828| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
829| and returns the proper extended double-precision floating-point value
830| corresponding to the abstract input. This routine is just like
831| `roundAndPackFloatx80' except that the input significand does not have to be
832| normalized.
833*----------------------------------------------------------------------------*/
834
835static floatx80
836 normalizeRoundAndPackFloatx80(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100837 int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard158142c2005-03-13 16:54:06 +0000838 STATUS_PARAM)
839{
840 int8 shiftCount;
841
842 if ( zSig0 == 0 ) {
843 zSig0 = zSig1;
844 zSig1 = 0;
845 zExp -= 64;
846 }
847 shiftCount = countLeadingZeros64( zSig0 );
848 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
849 zExp -= shiftCount;
850 return
851 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
852
853}
854
bellard158142c2005-03-13 16:54:06 +0000855/*----------------------------------------------------------------------------
856| Returns the least-significant 64 fraction bits of the quadruple-precision
857| floating-point value `a'.
858*----------------------------------------------------------------------------*/
859
Andreas Färberbb98fe42011-03-07 01:34:06 +0100860INLINE uint64_t extractFloat128Frac1( float128 a )
bellard158142c2005-03-13 16:54:06 +0000861{
862
863 return a.low;
864
865}
866
867/*----------------------------------------------------------------------------
868| Returns the most-significant 48 fraction bits of the quadruple-precision
869| floating-point value `a'.
870*----------------------------------------------------------------------------*/
871
Andreas Färberbb98fe42011-03-07 01:34:06 +0100872INLINE uint64_t extractFloat128Frac0( float128 a )
bellard158142c2005-03-13 16:54:06 +0000873{
874
875 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
876
877}
878
879/*----------------------------------------------------------------------------
880| Returns the exponent bits of the quadruple-precision floating-point value
881| `a'.
882*----------------------------------------------------------------------------*/
883
884INLINE int32 extractFloat128Exp( float128 a )
885{
886
887 return ( a.high>>48 ) & 0x7FFF;
888
889}
890
891/*----------------------------------------------------------------------------
892| Returns the sign bit of the quadruple-precision floating-point value `a'.
893*----------------------------------------------------------------------------*/
894
895INLINE flag extractFloat128Sign( float128 a )
896{
897
898 return a.high>>63;
899
900}
901
902/*----------------------------------------------------------------------------
903| Normalizes the subnormal quadruple-precision floating-point value
904| represented by the denormalized significand formed by the concatenation of
905| `aSig0' and `aSig1'. The normalized exponent is stored at the location
906| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
907| significand are stored at the location pointed to by `zSig0Ptr', and the
908| least significant 64 bits of the normalized significand are stored at the
909| location pointed to by `zSig1Ptr'.
910*----------------------------------------------------------------------------*/
911
912static void
913 normalizeFloat128Subnormal(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100914 uint64_t aSig0,
915 uint64_t aSig1,
bellard158142c2005-03-13 16:54:06 +0000916 int32 *zExpPtr,
Andreas Färberbb98fe42011-03-07 01:34:06 +0100917 uint64_t *zSig0Ptr,
918 uint64_t *zSig1Ptr
bellard158142c2005-03-13 16:54:06 +0000919 )
920{
921 int8 shiftCount;
922
923 if ( aSig0 == 0 ) {
924 shiftCount = countLeadingZeros64( aSig1 ) - 15;
925 if ( shiftCount < 0 ) {
926 *zSig0Ptr = aSig1>>( - shiftCount );
927 *zSig1Ptr = aSig1<<( shiftCount & 63 );
928 }
929 else {
930 *zSig0Ptr = aSig1<<shiftCount;
931 *zSig1Ptr = 0;
932 }
933 *zExpPtr = - shiftCount - 63;
934 }
935 else {
936 shiftCount = countLeadingZeros64( aSig0 ) - 15;
937 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
938 *zExpPtr = 1 - shiftCount;
939 }
940
941}
942
943/*----------------------------------------------------------------------------
944| Packs the sign `zSign', the exponent `zExp', and the significand formed
945| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
946| floating-point value, returning the result. After being shifted into the
947| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
948| added together to form the most significant 32 bits of the result. This
949| means that any integer portion of `zSig0' will be added into the exponent.
950| Since a properly normalized significand will have an integer portion equal
951| to 1, the `zExp' input should be 1 less than the desired result exponent
952| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
953| significand.
954*----------------------------------------------------------------------------*/
955
956INLINE float128
Andreas Färberbb98fe42011-03-07 01:34:06 +0100957 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
bellard158142c2005-03-13 16:54:06 +0000958{
959 float128 z;
960
961 z.low = zSig1;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100962 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
bellard158142c2005-03-13 16:54:06 +0000963 return z;
964
965}
966
967/*----------------------------------------------------------------------------
968| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
969| and extended significand formed by the concatenation of `zSig0', `zSig1',
970| and `zSig2', and returns the proper quadruple-precision floating-point value
971| corresponding to the abstract input. Ordinarily, the abstract value is
972| simply rounded and packed into the quadruple-precision format, with the
973| inexact exception raised if the abstract input cannot be represented
974| exactly. However, if the abstract value is too large, the overflow and
975| inexact exceptions are raised and an infinity or maximal finite value is
976| returned. If the abstract value is too small, the input value is rounded to
977| a subnormal number, and the underflow and inexact exceptions are raised if
978| the abstract input cannot be represented exactly as a subnormal quadruple-
979| precision floating-point number.
980| The input significand must be normalized or smaller. If the input
981| significand is not normalized, `zExp' must be 0; in that case, the result
982| returned is a subnormal number, and it must not require rounding. In the
983| usual case that the input significand is normalized, `zExp' must be 1 less
984| than the ``true'' floating-point exponent. The handling of underflow and
985| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
986*----------------------------------------------------------------------------*/
987
988static float128
989 roundAndPackFloat128(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100990 flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000991{
992 int8 roundingMode;
993 flag roundNearestEven, increment, isTiny;
994
995 roundingMode = STATUS(float_rounding_mode);
996 roundNearestEven = ( roundingMode == float_round_nearest_even );
Andreas Färberbb98fe42011-03-07 01:34:06 +0100997 increment = ( (int64_t) zSig2 < 0 );
bellard158142c2005-03-13 16:54:06 +0000998 if ( ! roundNearestEven ) {
999 if ( roundingMode == float_round_to_zero ) {
1000 increment = 0;
1001 }
1002 else {
1003 if ( zSign ) {
1004 increment = ( roundingMode == float_round_down ) && zSig2;
1005 }
1006 else {
1007 increment = ( roundingMode == float_round_up ) && zSig2;
1008 }
1009 }
1010 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001011 if ( 0x7FFD <= (uint32_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +00001012 if ( ( 0x7FFD < zExp )
1013 || ( ( zExp == 0x7FFD )
1014 && eq128(
1015 LIT64( 0x0001FFFFFFFFFFFF ),
1016 LIT64( 0xFFFFFFFFFFFFFFFF ),
1017 zSig0,
1018 zSig1
1019 )
1020 && increment
1021 )
1022 ) {
1023 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1024 if ( ( roundingMode == float_round_to_zero )
1025 || ( zSign && ( roundingMode == float_round_up ) )
1026 || ( ! zSign && ( roundingMode == float_round_down ) )
1027 ) {
1028 return
1029 packFloat128(
1030 zSign,
1031 0x7FFE,
1032 LIT64( 0x0000FFFFFFFFFFFF ),
1033 LIT64( 0xFFFFFFFFFFFFFFFF )
1034 );
1035 }
1036 return packFloat128( zSign, 0x7FFF, 0, 0 );
1037 }
1038 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01001039 if (STATUS(flush_to_zero)) {
1040 float_raise(float_flag_output_denormal STATUS_VAR);
1041 return packFloat128(zSign, 0, 0, 0);
1042 }
bellard158142c2005-03-13 16:54:06 +00001043 isTiny =
1044 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1045 || ( zExp < -1 )
1046 || ! increment
1047 || lt128(
1048 zSig0,
1049 zSig1,
1050 LIT64( 0x0001FFFFFFFFFFFF ),
1051 LIT64( 0xFFFFFFFFFFFFFFFF )
1052 );
1053 shift128ExtraRightJamming(
1054 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1055 zExp = 0;
1056 if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
1057 if ( roundNearestEven ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01001058 increment = ( (int64_t) zSig2 < 0 );
bellard158142c2005-03-13 16:54:06 +00001059 }
1060 else {
1061 if ( zSign ) {
1062 increment = ( roundingMode == float_round_down ) && zSig2;
1063 }
1064 else {
1065 increment = ( roundingMode == float_round_up ) && zSig2;
1066 }
1067 }
1068 }
1069 }
1070 if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1071 if ( increment ) {
1072 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1073 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1074 }
1075 else {
1076 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1077 }
1078 return packFloat128( zSign, zExp, zSig0, zSig1 );
1079
1080}
1081
1082/*----------------------------------------------------------------------------
1083| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1084| and significand formed by the concatenation of `zSig0' and `zSig1', and
1085| returns the proper quadruple-precision floating-point value corresponding
1086| to the abstract input. This routine is just like `roundAndPackFloat128'
1087| except that the input significand has fewer bits and does not have to be
1088| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1089| point exponent.
1090*----------------------------------------------------------------------------*/
1091
1092static float128
1093 normalizeRoundAndPackFloat128(
Andreas Färberbb98fe42011-03-07 01:34:06 +01001094 flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001095{
1096 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001097 uint64_t zSig2;
bellard158142c2005-03-13 16:54:06 +00001098
1099 if ( zSig0 == 0 ) {
1100 zSig0 = zSig1;
1101 zSig1 = 0;
1102 zExp -= 64;
1103 }
1104 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1105 if ( 0 <= shiftCount ) {
1106 zSig2 = 0;
1107 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1108 }
1109 else {
1110 shift128ExtraRightJamming(
1111 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1112 }
1113 zExp -= shiftCount;
1114 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1115
1116}
1117
bellard158142c2005-03-13 16:54:06 +00001118/*----------------------------------------------------------------------------
1119| Returns the result of converting the 32-bit two's complement integer `a'
1120| to the single-precision floating-point format. The conversion is performed
1121| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1122*----------------------------------------------------------------------------*/
1123
1124float32 int32_to_float32( int32 a STATUS_PARAM )
1125{
1126 flag zSign;
1127
pbrookf090c9d2007-11-18 14:33:24 +00001128 if ( a == 0 ) return float32_zero;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001129 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
bellard158142c2005-03-13 16:54:06 +00001130 zSign = ( a < 0 );
1131 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1132
1133}
1134
1135/*----------------------------------------------------------------------------
1136| Returns the result of converting the 32-bit two's complement integer `a'
1137| to the double-precision floating-point format. The conversion is performed
1138| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1139*----------------------------------------------------------------------------*/
1140
1141float64 int32_to_float64( int32 a STATUS_PARAM )
1142{
1143 flag zSign;
1144 uint32 absA;
1145 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001146 uint64_t zSig;
bellard158142c2005-03-13 16:54:06 +00001147
pbrookf090c9d2007-11-18 14:33:24 +00001148 if ( a == 0 ) return float64_zero;
bellard158142c2005-03-13 16:54:06 +00001149 zSign = ( a < 0 );
1150 absA = zSign ? - a : a;
1151 shiftCount = countLeadingZeros32( absA ) + 21;
1152 zSig = absA;
1153 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1154
1155}
1156
bellard158142c2005-03-13 16:54:06 +00001157/*----------------------------------------------------------------------------
1158| Returns the result of converting the 32-bit two's complement integer `a'
1159| to the extended double-precision floating-point format. The conversion
1160| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1161| Arithmetic.
1162*----------------------------------------------------------------------------*/
1163
1164floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
1165{
1166 flag zSign;
1167 uint32 absA;
1168 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001169 uint64_t zSig;
bellard158142c2005-03-13 16:54:06 +00001170
1171 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1172 zSign = ( a < 0 );
1173 absA = zSign ? - a : a;
1174 shiftCount = countLeadingZeros32( absA ) + 32;
1175 zSig = absA;
1176 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1177
1178}
1179
bellard158142c2005-03-13 16:54:06 +00001180/*----------------------------------------------------------------------------
1181| Returns the result of converting the 32-bit two's complement integer `a' to
1182| the quadruple-precision floating-point format. The conversion is performed
1183| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1184*----------------------------------------------------------------------------*/
1185
1186float128 int32_to_float128( int32 a STATUS_PARAM )
1187{
1188 flag zSign;
1189 uint32 absA;
1190 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001191 uint64_t zSig0;
bellard158142c2005-03-13 16:54:06 +00001192
1193 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1194 zSign = ( a < 0 );
1195 absA = zSign ? - a : a;
1196 shiftCount = countLeadingZeros32( absA ) + 17;
1197 zSig0 = absA;
1198 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1199
1200}
1201
bellard158142c2005-03-13 16:54:06 +00001202/*----------------------------------------------------------------------------
1203| Returns the result of converting the 64-bit two's complement integer `a'
1204| to the single-precision floating-point format. The conversion is performed
1205| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1206*----------------------------------------------------------------------------*/
1207
1208float32 int64_to_float32( int64 a STATUS_PARAM )
1209{
1210 flag zSign;
1211 uint64 absA;
1212 int8 shiftCount;
1213
pbrookf090c9d2007-11-18 14:33:24 +00001214 if ( a == 0 ) return float32_zero;
bellard158142c2005-03-13 16:54:06 +00001215 zSign = ( a < 0 );
1216 absA = zSign ? - a : a;
1217 shiftCount = countLeadingZeros64( absA ) - 40;
1218 if ( 0 <= shiftCount ) {
1219 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1220 }
1221 else {
1222 shiftCount += 7;
1223 if ( shiftCount < 0 ) {
1224 shift64RightJamming( absA, - shiftCount, &absA );
1225 }
1226 else {
1227 absA <<= shiftCount;
1228 }
1229 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1230 }
1231
1232}
1233
j_mayer3430b0b2007-03-20 22:25:37 +00001234float32 uint64_to_float32( uint64 a STATUS_PARAM )
j_mayer75d62a52007-03-20 22:10:42 +00001235{
1236 int8 shiftCount;
1237
pbrookf090c9d2007-11-18 14:33:24 +00001238 if ( a == 0 ) return float32_zero;
j_mayer75d62a52007-03-20 22:10:42 +00001239 shiftCount = countLeadingZeros64( a ) - 40;
1240 if ( 0 <= shiftCount ) {
1241 return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
1242 }
1243 else {
1244 shiftCount += 7;
1245 if ( shiftCount < 0 ) {
1246 shift64RightJamming( a, - shiftCount, &a );
1247 }
1248 else {
1249 a <<= shiftCount;
1250 }
1251 return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
1252 }
1253}
1254
bellard158142c2005-03-13 16:54:06 +00001255/*----------------------------------------------------------------------------
1256| Returns the result of converting the 64-bit two's complement integer `a'
1257| to the double-precision floating-point format. The conversion is performed
1258| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259*----------------------------------------------------------------------------*/
1260
1261float64 int64_to_float64( int64 a STATUS_PARAM )
1262{
1263 flag zSign;
1264
pbrookf090c9d2007-11-18 14:33:24 +00001265 if ( a == 0 ) return float64_zero;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001266 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
bellard158142c2005-03-13 16:54:06 +00001267 return packFloat64( 1, 0x43E, 0 );
1268 }
1269 zSign = ( a < 0 );
1270 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1271
1272}
1273
j_mayer75d62a52007-03-20 22:10:42 +00001274float64 uint64_to_float64( uint64 a STATUS_PARAM )
1275{
pbrookf090c9d2007-11-18 14:33:24 +00001276 if ( a == 0 ) return float64_zero;
j_mayer75d62a52007-03-20 22:10:42 +00001277 return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
1278
1279}
1280
bellard158142c2005-03-13 16:54:06 +00001281/*----------------------------------------------------------------------------
1282| Returns the result of converting the 64-bit two's complement integer `a'
1283| to the extended double-precision floating-point format. The conversion
1284| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1285| Arithmetic.
1286*----------------------------------------------------------------------------*/
1287
1288floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
1289{
1290 flag zSign;
1291 uint64 absA;
1292 int8 shiftCount;
1293
1294 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1295 zSign = ( a < 0 );
1296 absA = zSign ? - a : a;
1297 shiftCount = countLeadingZeros64( absA );
1298 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1299
1300}
1301
bellard158142c2005-03-13 16:54:06 +00001302/*----------------------------------------------------------------------------
1303| Returns the result of converting the 64-bit two's complement integer `a' to
1304| the quadruple-precision floating-point format. The conversion is performed
1305| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1306*----------------------------------------------------------------------------*/
1307
1308float128 int64_to_float128( int64 a STATUS_PARAM )
1309{
1310 flag zSign;
1311 uint64 absA;
1312 int8 shiftCount;
1313 int32 zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001314 uint64_t zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00001315
1316 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1317 zSign = ( a < 0 );
1318 absA = zSign ? - a : a;
1319 shiftCount = countLeadingZeros64( absA ) + 49;
1320 zExp = 0x406E - shiftCount;
1321 if ( 64 <= shiftCount ) {
1322 zSig1 = 0;
1323 zSig0 = absA;
1324 shiftCount -= 64;
1325 }
1326 else {
1327 zSig1 = absA;
1328 zSig0 = 0;
1329 }
1330 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1331 return packFloat128( zSign, zExp, zSig0, zSig1 );
1332
1333}
1334
bellard158142c2005-03-13 16:54:06 +00001335/*----------------------------------------------------------------------------
1336| Returns the result of converting the single-precision floating-point value
1337| `a' to the 32-bit two's complement integer format. The conversion is
1338| performed according to the IEC/IEEE Standard for Binary Floating-Point
1339| Arithmetic---which means in particular that the conversion is rounded
1340| according to the current rounding mode. If `a' is a NaN, the largest
1341| positive integer is returned. Otherwise, if the conversion overflows, the
1342| largest integer with the same sign as `a' is returned.
1343*----------------------------------------------------------------------------*/
1344
1345int32 float32_to_int32( float32 a STATUS_PARAM )
1346{
1347 flag aSign;
1348 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001349 uint32_t aSig;
1350 uint64_t aSig64;
bellard158142c2005-03-13 16:54:06 +00001351
Peter Maydell37d18662011-01-06 19:37:53 +00001352 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001353 aSig = extractFloat32Frac( a );
1354 aExp = extractFloat32Exp( a );
1355 aSign = extractFloat32Sign( a );
1356 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1357 if ( aExp ) aSig |= 0x00800000;
1358 shiftCount = 0xAF - aExp;
1359 aSig64 = aSig;
1360 aSig64 <<= 32;
1361 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1362 return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1363
1364}
1365
1366/*----------------------------------------------------------------------------
1367| Returns the result of converting the single-precision floating-point value
1368| `a' to the 32-bit two's complement integer format. The conversion is
1369| performed according to the IEC/IEEE Standard for Binary Floating-Point
1370| Arithmetic, except that the conversion is always rounded toward zero.
1371| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1372| the conversion overflows, the largest integer with the same sign as `a' is
1373| returned.
1374*----------------------------------------------------------------------------*/
1375
1376int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1377{
1378 flag aSign;
1379 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001380 uint32_t aSig;
bellard158142c2005-03-13 16:54:06 +00001381 int32 z;
Peter Maydell37d18662011-01-06 19:37:53 +00001382 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001383
1384 aSig = extractFloat32Frac( a );
1385 aExp = extractFloat32Exp( a );
1386 aSign = extractFloat32Sign( a );
1387 shiftCount = aExp - 0x9E;
1388 if ( 0 <= shiftCount ) {
pbrookf090c9d2007-11-18 14:33:24 +00001389 if ( float32_val(a) != 0xCF000000 ) {
bellard158142c2005-03-13 16:54:06 +00001390 float_raise( float_flag_invalid STATUS_VAR);
1391 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1392 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001393 return (int32_t) 0x80000000;
bellard158142c2005-03-13 16:54:06 +00001394 }
1395 else if ( aExp <= 0x7E ) {
1396 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1397 return 0;
1398 }
1399 aSig = ( aSig | 0x00800000 )<<8;
1400 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001401 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00001402 STATUS(float_exception_flags) |= float_flag_inexact;
1403 }
1404 if ( aSign ) z = - z;
1405 return z;
1406
1407}
1408
1409/*----------------------------------------------------------------------------
1410| Returns the result of converting the single-precision floating-point value
Peter Maydellcbcef452010-12-07 15:37:34 +00001411| `a' to the 16-bit two's complement integer format. The conversion is
1412| performed according to the IEC/IEEE Standard for Binary Floating-Point
1413| Arithmetic, except that the conversion is always rounded toward zero.
1414| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1415| the conversion overflows, the largest integer with the same sign as `a' is
1416| returned.
1417*----------------------------------------------------------------------------*/
1418
1419int16 float32_to_int16_round_to_zero( float32 a STATUS_PARAM )
1420{
1421 flag aSign;
1422 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001423 uint32_t aSig;
Peter Maydellcbcef452010-12-07 15:37:34 +00001424 int32 z;
1425
1426 aSig = extractFloat32Frac( a );
1427 aExp = extractFloat32Exp( a );
1428 aSign = extractFloat32Sign( a );
1429 shiftCount = aExp - 0x8E;
1430 if ( 0 <= shiftCount ) {
1431 if ( float32_val(a) != 0xC7000000 ) {
1432 float_raise( float_flag_invalid STATUS_VAR);
1433 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1434 return 0x7FFF;
1435 }
1436 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001437 return (int32_t) 0xffff8000;
Peter Maydellcbcef452010-12-07 15:37:34 +00001438 }
1439 else if ( aExp <= 0x7E ) {
1440 if ( aExp | aSig ) {
1441 STATUS(float_exception_flags) |= float_flag_inexact;
1442 }
1443 return 0;
1444 }
1445 shiftCount -= 0x10;
1446 aSig = ( aSig | 0x00800000 )<<8;
1447 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001448 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
Peter Maydellcbcef452010-12-07 15:37:34 +00001449 STATUS(float_exception_flags) |= float_flag_inexact;
1450 }
1451 if ( aSign ) {
1452 z = - z;
1453 }
1454 return z;
1455
1456}
1457
1458/*----------------------------------------------------------------------------
1459| Returns the result of converting the single-precision floating-point value
bellard158142c2005-03-13 16:54:06 +00001460| `a' to the 64-bit two's complement integer format. The conversion is
1461| performed according to the IEC/IEEE Standard for Binary Floating-Point
1462| Arithmetic---which means in particular that the conversion is rounded
1463| according to the current rounding mode. If `a' is a NaN, the largest
1464| positive integer is returned. Otherwise, if the conversion overflows, the
1465| largest integer with the same sign as `a' is returned.
1466*----------------------------------------------------------------------------*/
1467
1468int64 float32_to_int64( float32 a STATUS_PARAM )
1469{
1470 flag aSign;
1471 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001472 uint32_t aSig;
1473 uint64_t aSig64, aSigExtra;
Peter Maydell37d18662011-01-06 19:37:53 +00001474 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001475
1476 aSig = extractFloat32Frac( a );
1477 aExp = extractFloat32Exp( a );
1478 aSign = extractFloat32Sign( a );
1479 shiftCount = 0xBE - aExp;
1480 if ( shiftCount < 0 ) {
1481 float_raise( float_flag_invalid STATUS_VAR);
1482 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1483 return LIT64( 0x7FFFFFFFFFFFFFFF );
1484 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001485 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00001486 }
1487 if ( aExp ) aSig |= 0x00800000;
1488 aSig64 = aSig;
1489 aSig64 <<= 40;
1490 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1491 return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1492
1493}
1494
1495/*----------------------------------------------------------------------------
1496| Returns the result of converting the single-precision floating-point value
1497| `a' to the 64-bit two's complement integer format. The conversion is
1498| performed according to the IEC/IEEE Standard for Binary Floating-Point
1499| Arithmetic, except that the conversion is always rounded toward zero. If
1500| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
1501| conversion overflows, the largest integer with the same sign as `a' is
1502| returned.
1503*----------------------------------------------------------------------------*/
1504
1505int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1506{
1507 flag aSign;
1508 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001509 uint32_t aSig;
1510 uint64_t aSig64;
bellard158142c2005-03-13 16:54:06 +00001511 int64 z;
Peter Maydell37d18662011-01-06 19:37:53 +00001512 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001513
1514 aSig = extractFloat32Frac( a );
1515 aExp = extractFloat32Exp( a );
1516 aSign = extractFloat32Sign( a );
1517 shiftCount = aExp - 0xBE;
1518 if ( 0 <= shiftCount ) {
pbrookf090c9d2007-11-18 14:33:24 +00001519 if ( float32_val(a) != 0xDF000000 ) {
bellard158142c2005-03-13 16:54:06 +00001520 float_raise( float_flag_invalid STATUS_VAR);
1521 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1522 return LIT64( 0x7FFFFFFFFFFFFFFF );
1523 }
1524 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001525 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00001526 }
1527 else if ( aExp <= 0x7E ) {
1528 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1529 return 0;
1530 }
1531 aSig64 = aSig | 0x00800000;
1532 aSig64 <<= 40;
1533 z = aSig64>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001534 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00001535 STATUS(float_exception_flags) |= float_flag_inexact;
1536 }
1537 if ( aSign ) z = - z;
1538 return z;
1539
1540}
1541
1542/*----------------------------------------------------------------------------
1543| Returns the result of converting the single-precision floating-point value
1544| `a' to the double-precision floating-point format. The conversion is
1545| performed according to the IEC/IEEE Standard for Binary Floating-Point
1546| Arithmetic.
1547*----------------------------------------------------------------------------*/
1548
1549float64 float32_to_float64( float32 a STATUS_PARAM )
1550{
1551 flag aSign;
1552 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001553 uint32_t aSig;
Peter Maydell37d18662011-01-06 19:37:53 +00001554 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001555
1556 aSig = extractFloat32Frac( a );
1557 aExp = extractFloat32Exp( a );
1558 aSign = extractFloat32Sign( a );
1559 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001560 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001561 return packFloat64( aSign, 0x7FF, 0 );
1562 }
1563 if ( aExp == 0 ) {
1564 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1565 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1566 --aExp;
1567 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001568 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
bellard158142c2005-03-13 16:54:06 +00001569
1570}
1571
bellard158142c2005-03-13 16:54:06 +00001572/*----------------------------------------------------------------------------
1573| Returns the result of converting the single-precision floating-point value
1574| `a' to the extended double-precision floating-point format. The conversion
1575| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1576| Arithmetic.
1577*----------------------------------------------------------------------------*/
1578
1579floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1580{
1581 flag aSign;
1582 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001583 uint32_t aSig;
bellard158142c2005-03-13 16:54:06 +00001584
Peter Maydell37d18662011-01-06 19:37:53 +00001585 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001586 aSig = extractFloat32Frac( a );
1587 aExp = extractFloat32Exp( a );
1588 aSign = extractFloat32Sign( a );
1589 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001590 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001591 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1592 }
1593 if ( aExp == 0 ) {
1594 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1595 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1596 }
1597 aSig |= 0x00800000;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001598 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
bellard158142c2005-03-13 16:54:06 +00001599
1600}
1601
bellard158142c2005-03-13 16:54:06 +00001602/*----------------------------------------------------------------------------
1603| Returns the result of converting the single-precision floating-point value
1604| `a' to the double-precision floating-point format. The conversion is
1605| performed according to the IEC/IEEE Standard for Binary Floating-Point
1606| Arithmetic.
1607*----------------------------------------------------------------------------*/
1608
1609float128 float32_to_float128( float32 a STATUS_PARAM )
1610{
1611 flag aSign;
1612 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001613 uint32_t aSig;
bellard158142c2005-03-13 16:54:06 +00001614
Peter Maydell37d18662011-01-06 19:37:53 +00001615 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001616 aSig = extractFloat32Frac( a );
1617 aExp = extractFloat32Exp( a );
1618 aSign = extractFloat32Sign( a );
1619 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001620 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001621 return packFloat128( aSign, 0x7FFF, 0, 0 );
1622 }
1623 if ( aExp == 0 ) {
1624 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1625 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1626 --aExp;
1627 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001628 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
bellard158142c2005-03-13 16:54:06 +00001629
1630}
1631
bellard158142c2005-03-13 16:54:06 +00001632/*----------------------------------------------------------------------------
1633| Rounds the single-precision floating-point value `a' to an integer, and
1634| returns the result as a single-precision floating-point value. The
1635| operation is performed according to the IEC/IEEE Standard for Binary
1636| Floating-Point Arithmetic.
1637*----------------------------------------------------------------------------*/
1638
1639float32 float32_round_to_int( float32 a STATUS_PARAM)
1640{
1641 flag aSign;
1642 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001643 uint32_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00001644 int8 roundingMode;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001645 uint32_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00001646 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001647
1648 aExp = extractFloat32Exp( a );
1649 if ( 0x96 <= aExp ) {
1650 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1651 return propagateFloat32NaN( a, a STATUS_VAR );
1652 }
1653 return a;
1654 }
1655 if ( aExp <= 0x7E ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01001656 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00001657 STATUS(float_exception_flags) |= float_flag_inexact;
1658 aSign = extractFloat32Sign( a );
1659 switch ( STATUS(float_rounding_mode) ) {
1660 case float_round_nearest_even:
1661 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1662 return packFloat32( aSign, 0x7F, 0 );
1663 }
1664 break;
1665 case float_round_down:
pbrookf090c9d2007-11-18 14:33:24 +00001666 return make_float32(aSign ? 0xBF800000 : 0);
bellard158142c2005-03-13 16:54:06 +00001667 case float_round_up:
pbrookf090c9d2007-11-18 14:33:24 +00001668 return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard158142c2005-03-13 16:54:06 +00001669 }
1670 return packFloat32( aSign, 0, 0 );
1671 }
1672 lastBitMask = 1;
1673 lastBitMask <<= 0x96 - aExp;
1674 roundBitsMask = lastBitMask - 1;
pbrookf090c9d2007-11-18 14:33:24 +00001675 z = float32_val(a);
bellard158142c2005-03-13 16:54:06 +00001676 roundingMode = STATUS(float_rounding_mode);
1677 if ( roundingMode == float_round_nearest_even ) {
1678 z += lastBitMask>>1;
1679 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1680 }
1681 else if ( roundingMode != float_round_to_zero ) {
pbrookf090c9d2007-11-18 14:33:24 +00001682 if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard158142c2005-03-13 16:54:06 +00001683 z += roundBitsMask;
1684 }
1685 }
1686 z &= ~ roundBitsMask;
pbrookf090c9d2007-11-18 14:33:24 +00001687 if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1688 return make_float32(z);
bellard158142c2005-03-13 16:54:06 +00001689
1690}
1691
1692/*----------------------------------------------------------------------------
1693| Returns the result of adding the absolute values of the single-precision
1694| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1695| before being returned. `zSign' is ignored if the result is a NaN.
1696| The addition is performed according to the IEC/IEEE Standard for Binary
1697| Floating-Point Arithmetic.
1698*----------------------------------------------------------------------------*/
1699
1700static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1701{
1702 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001703 uint32_t aSig, bSig, zSig;
bellard158142c2005-03-13 16:54:06 +00001704 int16 expDiff;
1705
1706 aSig = extractFloat32Frac( a );
1707 aExp = extractFloat32Exp( a );
1708 bSig = extractFloat32Frac( b );
1709 bExp = extractFloat32Exp( b );
1710 expDiff = aExp - bExp;
1711 aSig <<= 6;
1712 bSig <<= 6;
1713 if ( 0 < expDiff ) {
1714 if ( aExp == 0xFF ) {
1715 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1716 return a;
1717 }
1718 if ( bExp == 0 ) {
1719 --expDiff;
1720 }
1721 else {
1722 bSig |= 0x20000000;
1723 }
1724 shift32RightJamming( bSig, expDiff, &bSig );
1725 zExp = aExp;
1726 }
1727 else if ( expDiff < 0 ) {
1728 if ( bExp == 0xFF ) {
1729 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1730 return packFloat32( zSign, 0xFF, 0 );
1731 }
1732 if ( aExp == 0 ) {
1733 ++expDiff;
1734 }
1735 else {
1736 aSig |= 0x20000000;
1737 }
1738 shift32RightJamming( aSig, - expDiff, &aSig );
1739 zExp = bExp;
1740 }
1741 else {
1742 if ( aExp == 0xFF ) {
1743 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1744 return a;
1745 }
pbrookfe76d972008-12-19 14:33:59 +00001746 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01001747 if (STATUS(flush_to_zero)) {
1748 if (aSig | bSig) {
1749 float_raise(float_flag_output_denormal STATUS_VAR);
1750 }
1751 return packFloat32(zSign, 0, 0);
1752 }
pbrookfe76d972008-12-19 14:33:59 +00001753 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1754 }
bellard158142c2005-03-13 16:54:06 +00001755 zSig = 0x40000000 + aSig + bSig;
1756 zExp = aExp;
1757 goto roundAndPack;
1758 }
1759 aSig |= 0x20000000;
1760 zSig = ( aSig + bSig )<<1;
1761 --zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001762 if ( (int32_t) zSig < 0 ) {
bellard158142c2005-03-13 16:54:06 +00001763 zSig = aSig + bSig;
1764 ++zExp;
1765 }
1766 roundAndPack:
1767 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1768
1769}
1770
1771/*----------------------------------------------------------------------------
1772| Returns the result of subtracting the absolute values of the single-
1773| precision floating-point values `a' and `b'. If `zSign' is 1, the
1774| difference is negated before being returned. `zSign' is ignored if the
1775| result is a NaN. The subtraction is performed according to the IEC/IEEE
1776| Standard for Binary Floating-Point Arithmetic.
1777*----------------------------------------------------------------------------*/
1778
1779static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1780{
1781 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001782 uint32_t aSig, bSig, zSig;
bellard158142c2005-03-13 16:54:06 +00001783 int16 expDiff;
1784
1785 aSig = extractFloat32Frac( a );
1786 aExp = extractFloat32Exp( a );
1787 bSig = extractFloat32Frac( b );
1788 bExp = extractFloat32Exp( b );
1789 expDiff = aExp - bExp;
1790 aSig <<= 7;
1791 bSig <<= 7;
1792 if ( 0 < expDiff ) goto aExpBigger;
1793 if ( expDiff < 0 ) goto bExpBigger;
1794 if ( aExp == 0xFF ) {
1795 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1796 float_raise( float_flag_invalid STATUS_VAR);
1797 return float32_default_nan;
1798 }
1799 if ( aExp == 0 ) {
1800 aExp = 1;
1801 bExp = 1;
1802 }
1803 if ( bSig < aSig ) goto aBigger;
1804 if ( aSig < bSig ) goto bBigger;
1805 return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1806 bExpBigger:
1807 if ( bExp == 0xFF ) {
1808 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1809 return packFloat32( zSign ^ 1, 0xFF, 0 );
1810 }
1811 if ( aExp == 0 ) {
1812 ++expDiff;
1813 }
1814 else {
1815 aSig |= 0x40000000;
1816 }
1817 shift32RightJamming( aSig, - expDiff, &aSig );
1818 bSig |= 0x40000000;
1819 bBigger:
1820 zSig = bSig - aSig;
1821 zExp = bExp;
1822 zSign ^= 1;
1823 goto normalizeRoundAndPack;
1824 aExpBigger:
1825 if ( aExp == 0xFF ) {
1826 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1827 return a;
1828 }
1829 if ( bExp == 0 ) {
1830 --expDiff;
1831 }
1832 else {
1833 bSig |= 0x40000000;
1834 }
1835 shift32RightJamming( bSig, expDiff, &bSig );
1836 aSig |= 0x40000000;
1837 aBigger:
1838 zSig = aSig - bSig;
1839 zExp = aExp;
1840 normalizeRoundAndPack:
1841 --zExp;
1842 return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1843
1844}
1845
1846/*----------------------------------------------------------------------------
1847| Returns the result of adding the single-precision floating-point values `a'
1848| and `b'. The operation is performed according to the IEC/IEEE Standard for
1849| Binary Floating-Point Arithmetic.
1850*----------------------------------------------------------------------------*/
1851
1852float32 float32_add( float32 a, float32 b STATUS_PARAM )
1853{
1854 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00001855 a = float32_squash_input_denormal(a STATUS_VAR);
1856 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001857
1858 aSign = extractFloat32Sign( a );
1859 bSign = extractFloat32Sign( b );
1860 if ( aSign == bSign ) {
1861 return addFloat32Sigs( a, b, aSign STATUS_VAR);
1862 }
1863 else {
1864 return subFloat32Sigs( a, b, aSign STATUS_VAR );
1865 }
1866
1867}
1868
1869/*----------------------------------------------------------------------------
1870| Returns the result of subtracting the single-precision floating-point values
1871| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1872| for Binary Floating-Point Arithmetic.
1873*----------------------------------------------------------------------------*/
1874
1875float32 float32_sub( float32 a, float32 b STATUS_PARAM )
1876{
1877 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00001878 a = float32_squash_input_denormal(a STATUS_VAR);
1879 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001880
1881 aSign = extractFloat32Sign( a );
1882 bSign = extractFloat32Sign( b );
1883 if ( aSign == bSign ) {
1884 return subFloat32Sigs( a, b, aSign STATUS_VAR );
1885 }
1886 else {
1887 return addFloat32Sigs( a, b, aSign STATUS_VAR );
1888 }
1889
1890}
1891
1892/*----------------------------------------------------------------------------
1893| Returns the result of multiplying the single-precision floating-point values
1894| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
1895| for Binary Floating-Point Arithmetic.
1896*----------------------------------------------------------------------------*/
1897
1898float32 float32_mul( float32 a, float32 b STATUS_PARAM )
1899{
1900 flag aSign, bSign, zSign;
1901 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001902 uint32_t aSig, bSig;
1903 uint64_t zSig64;
1904 uint32_t zSig;
bellard158142c2005-03-13 16:54:06 +00001905
Peter Maydell37d18662011-01-06 19:37:53 +00001906 a = float32_squash_input_denormal(a STATUS_VAR);
1907 b = float32_squash_input_denormal(b STATUS_VAR);
1908
bellard158142c2005-03-13 16:54:06 +00001909 aSig = extractFloat32Frac( a );
1910 aExp = extractFloat32Exp( a );
1911 aSign = extractFloat32Sign( a );
1912 bSig = extractFloat32Frac( b );
1913 bExp = extractFloat32Exp( b );
1914 bSign = extractFloat32Sign( b );
1915 zSign = aSign ^ bSign;
1916 if ( aExp == 0xFF ) {
1917 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1918 return propagateFloat32NaN( a, b STATUS_VAR );
1919 }
1920 if ( ( bExp | bSig ) == 0 ) {
1921 float_raise( float_flag_invalid STATUS_VAR);
1922 return float32_default_nan;
1923 }
1924 return packFloat32( zSign, 0xFF, 0 );
1925 }
1926 if ( bExp == 0xFF ) {
1927 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1928 if ( ( aExp | aSig ) == 0 ) {
1929 float_raise( float_flag_invalid STATUS_VAR);
1930 return float32_default_nan;
1931 }
1932 return packFloat32( zSign, 0xFF, 0 );
1933 }
1934 if ( aExp == 0 ) {
1935 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1936 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1937 }
1938 if ( bExp == 0 ) {
1939 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1940 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1941 }
1942 zExp = aExp + bExp - 0x7F;
1943 aSig = ( aSig | 0x00800000 )<<7;
1944 bSig = ( bSig | 0x00800000 )<<8;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001945 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
bellard158142c2005-03-13 16:54:06 +00001946 zSig = zSig64;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001947 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00001948 zSig <<= 1;
1949 --zExp;
1950 }
1951 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1952
1953}
1954
1955/*----------------------------------------------------------------------------
1956| Returns the result of dividing the single-precision floating-point value `a'
1957| by the corresponding value `b'. The operation is performed according to the
1958| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1959*----------------------------------------------------------------------------*/
1960
1961float32 float32_div( float32 a, float32 b STATUS_PARAM )
1962{
1963 flag aSign, bSign, zSign;
1964 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001965 uint32_t aSig, bSig, zSig;
Peter Maydell37d18662011-01-06 19:37:53 +00001966 a = float32_squash_input_denormal(a STATUS_VAR);
1967 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001968
1969 aSig = extractFloat32Frac( a );
1970 aExp = extractFloat32Exp( a );
1971 aSign = extractFloat32Sign( a );
1972 bSig = extractFloat32Frac( b );
1973 bExp = extractFloat32Exp( b );
1974 bSign = extractFloat32Sign( b );
1975 zSign = aSign ^ bSign;
1976 if ( aExp == 0xFF ) {
1977 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1978 if ( bExp == 0xFF ) {
1979 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1980 float_raise( float_flag_invalid STATUS_VAR);
1981 return float32_default_nan;
1982 }
1983 return packFloat32( zSign, 0xFF, 0 );
1984 }
1985 if ( bExp == 0xFF ) {
1986 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1987 return packFloat32( zSign, 0, 0 );
1988 }
1989 if ( bExp == 0 ) {
1990 if ( bSig == 0 ) {
1991 if ( ( aExp | aSig ) == 0 ) {
1992 float_raise( float_flag_invalid STATUS_VAR);
1993 return float32_default_nan;
1994 }
1995 float_raise( float_flag_divbyzero STATUS_VAR);
1996 return packFloat32( zSign, 0xFF, 0 );
1997 }
1998 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1999 }
2000 if ( aExp == 0 ) {
2001 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2002 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2003 }
2004 zExp = aExp - bExp + 0x7D;
2005 aSig = ( aSig | 0x00800000 )<<7;
2006 bSig = ( bSig | 0x00800000 )<<8;
2007 if ( bSig <= ( aSig + aSig ) ) {
2008 aSig >>= 1;
2009 ++zExp;
2010 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002011 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard158142c2005-03-13 16:54:06 +00002012 if ( ( zSig & 0x3F ) == 0 ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002013 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
bellard158142c2005-03-13 16:54:06 +00002014 }
2015 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2016
2017}
2018
2019/*----------------------------------------------------------------------------
2020| Returns the remainder of the single-precision floating-point value `a'
2021| with respect to the corresponding value `b'. The operation is performed
2022| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2023*----------------------------------------------------------------------------*/
2024
2025float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2026{
Blue Swirled086f32010-03-07 13:49:58 +00002027 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00002028 int16 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002029 uint32_t aSig, bSig;
2030 uint32_t q;
2031 uint64_t aSig64, bSig64, q64;
2032 uint32_t alternateASig;
2033 int32_t sigMean;
Peter Maydell37d18662011-01-06 19:37:53 +00002034 a = float32_squash_input_denormal(a STATUS_VAR);
2035 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002036
2037 aSig = extractFloat32Frac( a );
2038 aExp = extractFloat32Exp( a );
2039 aSign = extractFloat32Sign( a );
2040 bSig = extractFloat32Frac( b );
2041 bExp = extractFloat32Exp( b );
bellard158142c2005-03-13 16:54:06 +00002042 if ( aExp == 0xFF ) {
2043 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2044 return propagateFloat32NaN( a, b STATUS_VAR );
2045 }
2046 float_raise( float_flag_invalid STATUS_VAR);
2047 return float32_default_nan;
2048 }
2049 if ( bExp == 0xFF ) {
2050 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2051 return a;
2052 }
2053 if ( bExp == 0 ) {
2054 if ( bSig == 0 ) {
2055 float_raise( float_flag_invalid STATUS_VAR);
2056 return float32_default_nan;
2057 }
2058 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2059 }
2060 if ( aExp == 0 ) {
2061 if ( aSig == 0 ) return a;
2062 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2063 }
2064 expDiff = aExp - bExp;
2065 aSig |= 0x00800000;
2066 bSig |= 0x00800000;
2067 if ( expDiff < 32 ) {
2068 aSig <<= 8;
2069 bSig <<= 8;
2070 if ( expDiff < 0 ) {
2071 if ( expDiff < -1 ) return a;
2072 aSig >>= 1;
2073 }
2074 q = ( bSig <= aSig );
2075 if ( q ) aSig -= bSig;
2076 if ( 0 < expDiff ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002077 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard158142c2005-03-13 16:54:06 +00002078 q >>= 32 - expDiff;
2079 bSig >>= 2;
2080 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2081 }
2082 else {
2083 aSig >>= 2;
2084 bSig >>= 2;
2085 }
2086 }
2087 else {
2088 if ( bSig <= aSig ) aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002089 aSig64 = ( (uint64_t) aSig )<<40;
2090 bSig64 = ( (uint64_t) bSig )<<40;
bellard158142c2005-03-13 16:54:06 +00002091 expDiff -= 64;
2092 while ( 0 < expDiff ) {
2093 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2094 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2095 aSig64 = - ( ( bSig * q64 )<<38 );
2096 expDiff -= 62;
2097 }
2098 expDiff += 64;
2099 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2100 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2101 q = q64>>( 64 - expDiff );
2102 bSig <<= 6;
2103 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2104 }
2105 do {
2106 alternateASig = aSig;
2107 ++q;
2108 aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002109 } while ( 0 <= (int32_t) aSig );
bellard158142c2005-03-13 16:54:06 +00002110 sigMean = aSig + alternateASig;
2111 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2112 aSig = alternateASig;
2113 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002114 zSign = ( (int32_t) aSig < 0 );
bellard158142c2005-03-13 16:54:06 +00002115 if ( zSign ) aSig = - aSig;
2116 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2117
2118}
2119
2120/*----------------------------------------------------------------------------
Peter Maydell369be8f2011-10-19 16:14:06 +00002121| Returns the result of multiplying the single-precision floating-point values
2122| `a' and `b' then adding 'c', with no intermediate rounding step after the
2123| multiplication. The operation is performed according to the IEC/IEEE
2124| Standard for Binary Floating-Point Arithmetic 754-2008.
2125| The flags argument allows the caller to select negation of the
2126| addend, the intermediate product, or the final result. (The difference
2127| between this and having the caller do a separate negation is that negating
2128| externally will flip the sign bit on NaNs.)
2129*----------------------------------------------------------------------------*/
2130
2131float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
2132{
2133 flag aSign, bSign, cSign, zSign;
2134 int aExp, bExp, cExp, pExp, zExp, expDiff;
2135 uint32_t aSig, bSig, cSig;
2136 flag pInf, pZero, pSign;
2137 uint64_t pSig64, cSig64, zSig64;
2138 uint32_t pSig;
2139 int shiftcount;
2140 flag signflip, infzero;
2141
2142 a = float32_squash_input_denormal(a STATUS_VAR);
2143 b = float32_squash_input_denormal(b STATUS_VAR);
2144 c = float32_squash_input_denormal(c STATUS_VAR);
2145 aSig = extractFloat32Frac(a);
2146 aExp = extractFloat32Exp(a);
2147 aSign = extractFloat32Sign(a);
2148 bSig = extractFloat32Frac(b);
2149 bExp = extractFloat32Exp(b);
2150 bSign = extractFloat32Sign(b);
2151 cSig = extractFloat32Frac(c);
2152 cExp = extractFloat32Exp(c);
2153 cSign = extractFloat32Sign(c);
2154
2155 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2156 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2157
2158 /* It is implementation-defined whether the cases of (0,inf,qnan)
2159 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
2160 * they return if they do), so we have to hand this information
2161 * off to the target-specific pick-a-NaN routine.
2162 */
2163 if (((aExp == 0xff) && aSig) ||
2164 ((bExp == 0xff) && bSig) ||
2165 ((cExp == 0xff) && cSig)) {
2166 return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
2167 }
2168
2169 if (infzero) {
2170 float_raise(float_flag_invalid STATUS_VAR);
2171 return float32_default_nan;
2172 }
2173
2174 if (flags & float_muladd_negate_c) {
2175 cSign ^= 1;
2176 }
2177
2178 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2179
2180 /* Work out the sign and type of the product */
2181 pSign = aSign ^ bSign;
2182 if (flags & float_muladd_negate_product) {
2183 pSign ^= 1;
2184 }
2185 pInf = (aExp == 0xff) || (bExp == 0xff);
2186 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2187
2188 if (cExp == 0xff) {
2189 if (pInf && (pSign ^ cSign)) {
2190 /* addition of opposite-signed infinities => InvalidOperation */
2191 float_raise(float_flag_invalid STATUS_VAR);
2192 return float32_default_nan;
2193 }
2194 /* Otherwise generate an infinity of the same sign */
2195 return packFloat32(cSign ^ signflip, 0xff, 0);
2196 }
2197
2198 if (pInf) {
2199 return packFloat32(pSign ^ signflip, 0xff, 0);
2200 }
2201
2202 if (pZero) {
2203 if (cExp == 0) {
2204 if (cSig == 0) {
2205 /* Adding two exact zeroes */
2206 if (pSign == cSign) {
2207 zSign = pSign;
2208 } else if (STATUS(float_rounding_mode) == float_round_down) {
2209 zSign = 1;
2210 } else {
2211 zSign = 0;
2212 }
2213 return packFloat32(zSign ^ signflip, 0, 0);
2214 }
2215 /* Exact zero plus a denorm */
2216 if (STATUS(flush_to_zero)) {
2217 float_raise(float_flag_output_denormal STATUS_VAR);
2218 return packFloat32(cSign ^ signflip, 0, 0);
2219 }
2220 }
2221 /* Zero plus something non-zero : just return the something */
2222 return c ^ (signflip << 31);
2223 }
2224
2225 if (aExp == 0) {
2226 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2227 }
2228 if (bExp == 0) {
2229 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2230 }
2231
2232 /* Calculate the actual result a * b + c */
2233
2234 /* Multiply first; this is easy. */
2235 /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
2236 * because we want the true exponent, not the "one-less-than"
2237 * flavour that roundAndPackFloat32() takes.
2238 */
2239 pExp = aExp + bExp - 0x7e;
2240 aSig = (aSig | 0x00800000) << 7;
2241 bSig = (bSig | 0x00800000) << 8;
2242 pSig64 = (uint64_t)aSig * bSig;
2243 if ((int64_t)(pSig64 << 1) >= 0) {
2244 pSig64 <<= 1;
2245 pExp--;
2246 }
2247
2248 zSign = pSign ^ signflip;
2249
2250 /* Now pSig64 is the significand of the multiply, with the explicit bit in
2251 * position 62.
2252 */
2253 if (cExp == 0) {
2254 if (!cSig) {
2255 /* Throw out the special case of c being an exact zero now */
2256 shift64RightJamming(pSig64, 32, &pSig64);
2257 pSig = pSig64;
2258 return roundAndPackFloat32(zSign, pExp - 1,
2259 pSig STATUS_VAR);
2260 }
2261 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2262 }
2263
2264 cSig64 = (uint64_t)cSig << (62 - 23);
2265 cSig64 |= LIT64(0x4000000000000000);
2266 expDiff = pExp - cExp;
2267
2268 if (pSign == cSign) {
2269 /* Addition */
2270 if (expDiff > 0) {
2271 /* scale c to match p */
2272 shift64RightJamming(cSig64, expDiff, &cSig64);
2273 zExp = pExp;
2274 } else if (expDiff < 0) {
2275 /* scale p to match c */
2276 shift64RightJamming(pSig64, -expDiff, &pSig64);
2277 zExp = cExp;
2278 } else {
2279 /* no scaling needed */
2280 zExp = cExp;
2281 }
2282 /* Add significands and make sure explicit bit ends up in posn 62 */
2283 zSig64 = pSig64 + cSig64;
2284 if ((int64_t)zSig64 < 0) {
2285 shift64RightJamming(zSig64, 1, &zSig64);
2286 } else {
2287 zExp--;
2288 }
2289 } else {
2290 /* Subtraction */
2291 if (expDiff > 0) {
2292 shift64RightJamming(cSig64, expDiff, &cSig64);
2293 zSig64 = pSig64 - cSig64;
2294 zExp = pExp;
2295 } else if (expDiff < 0) {
2296 shift64RightJamming(pSig64, -expDiff, &pSig64);
2297 zSig64 = cSig64 - pSig64;
2298 zExp = cExp;
2299 zSign ^= 1;
2300 } else {
2301 zExp = pExp;
2302 if (cSig64 < pSig64) {
2303 zSig64 = pSig64 - cSig64;
2304 } else if (pSig64 < cSig64) {
2305 zSig64 = cSig64 - pSig64;
2306 zSign ^= 1;
2307 } else {
2308 /* Exact zero */
2309 zSign = signflip;
2310 if (STATUS(float_rounding_mode) == float_round_down) {
2311 zSign ^= 1;
2312 }
2313 return packFloat32(zSign, 0, 0);
2314 }
2315 }
2316 --zExp;
2317 /* Normalize to put the explicit bit back into bit 62. */
2318 shiftcount = countLeadingZeros64(zSig64) - 1;
2319 zSig64 <<= shiftcount;
2320 zExp -= shiftcount;
2321 }
2322 shift64RightJamming(zSig64, 32, &zSig64);
2323 return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
2324}
2325
2326
2327/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002328| Returns the square root of the single-precision floating-point value `a'.
2329| The operation is performed according to the IEC/IEEE Standard for Binary
2330| Floating-Point Arithmetic.
2331*----------------------------------------------------------------------------*/
2332
2333float32 float32_sqrt( float32 a STATUS_PARAM )
2334{
2335 flag aSign;
2336 int16 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002337 uint32_t aSig, zSig;
2338 uint64_t rem, term;
Peter Maydell37d18662011-01-06 19:37:53 +00002339 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002340
2341 aSig = extractFloat32Frac( a );
2342 aExp = extractFloat32Exp( a );
2343 aSign = extractFloat32Sign( a );
2344 if ( aExp == 0xFF ) {
pbrookf090c9d2007-11-18 14:33:24 +00002345 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00002346 if ( ! aSign ) return a;
2347 float_raise( float_flag_invalid STATUS_VAR);
2348 return float32_default_nan;
2349 }
2350 if ( aSign ) {
2351 if ( ( aExp | aSig ) == 0 ) return a;
2352 float_raise( float_flag_invalid STATUS_VAR);
2353 return float32_default_nan;
2354 }
2355 if ( aExp == 0 ) {
pbrookf090c9d2007-11-18 14:33:24 +00002356 if ( aSig == 0 ) return float32_zero;
bellard158142c2005-03-13 16:54:06 +00002357 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2358 }
2359 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2360 aSig = ( aSig | 0x00800000 )<<8;
2361 zSig = estimateSqrt32( aExp, aSig ) + 2;
2362 if ( ( zSig & 0x7F ) <= 5 ) {
2363 if ( zSig < 2 ) {
2364 zSig = 0x7FFFFFFF;
2365 goto roundAndPack;
2366 }
2367 aSig >>= aExp & 1;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002368 term = ( (uint64_t) zSig ) * zSig;
2369 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2370 while ( (int64_t) rem < 0 ) {
bellard158142c2005-03-13 16:54:06 +00002371 --zSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002372 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
bellard158142c2005-03-13 16:54:06 +00002373 }
2374 zSig |= ( rem != 0 );
2375 }
2376 shift32RightJamming( zSig, 1, &zSig );
2377 roundAndPack:
2378 return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2379
2380}
2381
2382/*----------------------------------------------------------------------------
Aurelien Jarno8229c992009-02-05 12:04:05 +01002383| Returns the binary exponential of the single-precision floating-point value
2384| `a'. The operation is performed according to the IEC/IEEE Standard for
2385| Binary Floating-Point Arithmetic.
2386|
2387| Uses the following identities:
2388|
2389| 1. -------------------------------------------------------------------------
2390| x x*ln(2)
2391| 2 = e
2392|
2393| 2. -------------------------------------------------------------------------
2394| 2 3 4 5 n
2395| x x x x x x x
2396| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2397| 1! 2! 3! 4! 5! n!
2398*----------------------------------------------------------------------------*/
2399
2400static const float64 float32_exp2_coefficients[15] =
2401{
Peter Maydelld5138cf2011-02-10 13:59:34 +00002402 const_float64( 0x3ff0000000000000ll ), /* 1 */
2403 const_float64( 0x3fe0000000000000ll ), /* 2 */
2404 const_float64( 0x3fc5555555555555ll ), /* 3 */
2405 const_float64( 0x3fa5555555555555ll ), /* 4 */
2406 const_float64( 0x3f81111111111111ll ), /* 5 */
2407 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
2408 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
2409 const_float64( 0x3efa01a01a01a01all ), /* 8 */
2410 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
2411 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2412 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2413 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2414 const_float64( 0x3de6124613a86d09ll ), /* 13 */
2415 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2416 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
Aurelien Jarno8229c992009-02-05 12:04:05 +01002417};
2418
2419float32 float32_exp2( float32 a STATUS_PARAM )
2420{
2421 flag aSign;
2422 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002423 uint32_t aSig;
Aurelien Jarno8229c992009-02-05 12:04:05 +01002424 float64 r, x, xn;
2425 int i;
Peter Maydell37d18662011-01-06 19:37:53 +00002426 a = float32_squash_input_denormal(a STATUS_VAR);
Aurelien Jarno8229c992009-02-05 12:04:05 +01002427
2428 aSig = extractFloat32Frac( a );
2429 aExp = extractFloat32Exp( a );
2430 aSign = extractFloat32Sign( a );
2431
2432 if ( aExp == 0xFF) {
2433 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2434 return (aSign) ? float32_zero : a;
2435 }
2436 if (aExp == 0) {
2437 if (aSig == 0) return float32_one;
2438 }
2439
2440 float_raise( float_flag_inexact STATUS_VAR);
2441
2442 /* ******************************* */
2443 /* using float64 for approximation */
2444 /* ******************************* */
2445 x = float32_to_float64(a STATUS_VAR);
2446 x = float64_mul(x, float64_ln2 STATUS_VAR);
2447
2448 xn = x;
2449 r = float64_one;
2450 for (i = 0 ; i < 15 ; i++) {
2451 float64 f;
2452
2453 f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2454 r = float64_add(r, f STATUS_VAR);
2455
2456 xn = float64_mul(xn, x STATUS_VAR);
2457 }
2458
2459 return float64_to_float32(r, status);
2460}
2461
2462/*----------------------------------------------------------------------------
aurel32374dfc32009-02-05 13:42:47 +00002463| Returns the binary log of the single-precision floating-point value `a'.
2464| The operation is performed according to the IEC/IEEE Standard for Binary
2465| Floating-Point Arithmetic.
2466*----------------------------------------------------------------------------*/
2467float32 float32_log2( float32 a STATUS_PARAM )
2468{
2469 flag aSign, zSign;
2470 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002471 uint32_t aSig, zSig, i;
aurel32374dfc32009-02-05 13:42:47 +00002472
Peter Maydell37d18662011-01-06 19:37:53 +00002473 a = float32_squash_input_denormal(a STATUS_VAR);
aurel32374dfc32009-02-05 13:42:47 +00002474 aSig = extractFloat32Frac( a );
2475 aExp = extractFloat32Exp( a );
2476 aSign = extractFloat32Sign( a );
2477
2478 if ( aExp == 0 ) {
2479 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2480 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2481 }
2482 if ( aSign ) {
2483 float_raise( float_flag_invalid STATUS_VAR);
2484 return float32_default_nan;
2485 }
2486 if ( aExp == 0xFF ) {
2487 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2488 return a;
2489 }
2490
2491 aExp -= 0x7F;
2492 aSig |= 0x00800000;
2493 zSign = aExp < 0;
2494 zSig = aExp << 23;
2495
2496 for (i = 1 << 22; i > 0; i >>= 1) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002497 aSig = ( (uint64_t)aSig * aSig ) >> 23;
aurel32374dfc32009-02-05 13:42:47 +00002498 if ( aSig & 0x01000000 ) {
2499 aSig >>= 1;
2500 zSig |= i;
2501 }
2502 }
2503
2504 if ( zSign )
2505 zSig = -zSig;
2506
2507 return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2508}
2509
2510/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002511| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarnob6893622011-04-14 00:49:29 +02002512| the corresponding value `b', and 0 otherwise. The invalid exception is
2513| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00002514| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2515*----------------------------------------------------------------------------*/
2516
Aurelien Jarnob6893622011-04-14 00:49:29 +02002517int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002518{
Aurelien Jarnob6893622011-04-14 00:49:29 +02002519 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002520 a = float32_squash_input_denormal(a STATUS_VAR);
2521 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002522
2523 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2524 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2525 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02002526 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002527 return 0;
2528 }
Aurelien Jarnob6893622011-04-14 00:49:29 +02002529 av = float32_val(a);
2530 bv = float32_val(b);
2531 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00002532}
2533
2534/*----------------------------------------------------------------------------
2535| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002536| or equal to the corresponding value `b', and 0 otherwise. The invalid
2537| exception is raised if either operand is a NaN. The comparison is performed
2538| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002539*----------------------------------------------------------------------------*/
2540
bellard750afe92006-10-28 19:27:11 +00002541int float32_le( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002542{
2543 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002544 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002545 a = float32_squash_input_denormal(a STATUS_VAR);
2546 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002547
2548 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2549 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2550 ) {
2551 float_raise( float_flag_invalid STATUS_VAR);
2552 return 0;
2553 }
2554 aSign = extractFloat32Sign( a );
2555 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002556 av = float32_val(a);
2557 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002558 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002559 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002560
2561}
2562
2563/*----------------------------------------------------------------------------
2564| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002565| the corresponding value `b', and 0 otherwise. The invalid exception is
2566| raised if either operand is a NaN. The comparison is performed according
2567| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002568*----------------------------------------------------------------------------*/
2569
bellard750afe92006-10-28 19:27:11 +00002570int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002571{
2572 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002573 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002574 a = float32_squash_input_denormal(a STATUS_VAR);
2575 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002576
2577 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2578 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2579 ) {
2580 float_raise( float_flag_invalid STATUS_VAR);
2581 return 0;
2582 }
2583 aSign = extractFloat32Sign( a );
2584 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002585 av = float32_val(a);
2586 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002587 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002588 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002589
2590}
2591
2592/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02002593| Returns 1 if the single-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002594| be compared, and 0 otherwise. The invalid exception is raised if either
2595| operand is a NaN. The comparison is performed according to the IEC/IEEE
2596| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02002597*----------------------------------------------------------------------------*/
2598
2599int float32_unordered( float32 a, float32 b STATUS_PARAM )
2600{
2601 a = float32_squash_input_denormal(a STATUS_VAR);
2602 b = float32_squash_input_denormal(b STATUS_VAR);
2603
2604 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2605 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2606 ) {
2607 float_raise( float_flag_invalid STATUS_VAR);
2608 return 1;
2609 }
2610 return 0;
2611}
Aurelien Jarnob6893622011-04-14 00:49:29 +02002612
Aurelien Jarno67b78612011-04-14 00:49:29 +02002613/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002614| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002615| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2616| exception. The comparison is performed according to the IEC/IEEE Standard
2617| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002618*----------------------------------------------------------------------------*/
2619
Aurelien Jarnob6893622011-04-14 00:49:29 +02002620int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002621{
Peter Maydell37d18662011-01-06 19:37:53 +00002622 a = float32_squash_input_denormal(a STATUS_VAR);
2623 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002624
2625 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2626 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2627 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02002628 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2629 float_raise( float_flag_invalid STATUS_VAR);
2630 }
bellard158142c2005-03-13 16:54:06 +00002631 return 0;
2632 }
Aurelien Jarnob6893622011-04-14 00:49:29 +02002633 return ( float32_val(a) == float32_val(b) ) ||
2634 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00002635}
2636
2637/*----------------------------------------------------------------------------
2638| Returns 1 if the single-precision floating-point value `a' is less than or
2639| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2640| cause an exception. Otherwise, the comparison is performed according to the
2641| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2642*----------------------------------------------------------------------------*/
2643
bellard750afe92006-10-28 19:27:11 +00002644int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002645{
2646 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002647 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002648 a = float32_squash_input_denormal(a STATUS_VAR);
2649 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002650
2651 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2652 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2653 ) {
2654 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2655 float_raise( float_flag_invalid STATUS_VAR);
2656 }
2657 return 0;
2658 }
2659 aSign = extractFloat32Sign( a );
2660 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002661 av = float32_val(a);
2662 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002663 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002664 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002665
2666}
2667
2668/*----------------------------------------------------------------------------
2669| Returns 1 if the single-precision floating-point value `a' is less than
2670| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2671| exception. Otherwise, the comparison is performed according to the IEC/IEEE
2672| Standard for Binary Floating-Point Arithmetic.
2673*----------------------------------------------------------------------------*/
2674
bellard750afe92006-10-28 19:27:11 +00002675int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002676{
2677 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002678 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002679 a = float32_squash_input_denormal(a STATUS_VAR);
2680 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002681
2682 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2683 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2684 ) {
2685 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2686 float_raise( float_flag_invalid STATUS_VAR);
2687 }
2688 return 0;
2689 }
2690 aSign = extractFloat32Sign( a );
2691 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002692 av = float32_val(a);
2693 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002694 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002695 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002696
2697}
2698
2699/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02002700| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2701| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
2702| comparison is performed according to the IEC/IEEE Standard for Binary
2703| Floating-Point Arithmetic.
2704*----------------------------------------------------------------------------*/
2705
2706int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2707{
2708 a = float32_squash_input_denormal(a STATUS_VAR);
2709 b = float32_squash_input_denormal(b STATUS_VAR);
2710
2711 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2712 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2713 ) {
2714 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2715 float_raise( float_flag_invalid STATUS_VAR);
2716 }
2717 return 1;
2718 }
2719 return 0;
2720}
2721
2722/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002723| Returns the result of converting the double-precision floating-point value
2724| `a' to the 32-bit two's complement integer format. The conversion is
2725| performed according to the IEC/IEEE Standard for Binary Floating-Point
2726| Arithmetic---which means in particular that the conversion is rounded
2727| according to the current rounding mode. If `a' is a NaN, the largest
2728| positive integer is returned. Otherwise, if the conversion overflows, the
2729| largest integer with the same sign as `a' is returned.
2730*----------------------------------------------------------------------------*/
2731
2732int32 float64_to_int32( float64 a STATUS_PARAM )
2733{
2734 flag aSign;
2735 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002736 uint64_t aSig;
Peter Maydell37d18662011-01-06 19:37:53 +00002737 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002738
2739 aSig = extractFloat64Frac( a );
2740 aExp = extractFloat64Exp( a );
2741 aSign = extractFloat64Sign( a );
2742 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2743 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2744 shiftCount = 0x42C - aExp;
2745 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2746 return roundAndPackInt32( aSign, aSig STATUS_VAR );
2747
2748}
2749
2750/*----------------------------------------------------------------------------
2751| Returns the result of converting the double-precision floating-point value
2752| `a' to the 32-bit two's complement integer format. The conversion is
2753| performed according to the IEC/IEEE Standard for Binary Floating-Point
2754| Arithmetic, except that the conversion is always rounded toward zero.
2755| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2756| the conversion overflows, the largest integer with the same sign as `a' is
2757| returned.
2758*----------------------------------------------------------------------------*/
2759
2760int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2761{
2762 flag aSign;
2763 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002764 uint64_t aSig, savedASig;
bellard158142c2005-03-13 16:54:06 +00002765 int32 z;
Peter Maydell37d18662011-01-06 19:37:53 +00002766 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002767
2768 aSig = extractFloat64Frac( a );
2769 aExp = extractFloat64Exp( a );
2770 aSign = extractFloat64Sign( a );
2771 if ( 0x41E < aExp ) {
2772 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2773 goto invalid;
2774 }
2775 else if ( aExp < 0x3FF ) {
2776 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2777 return 0;
2778 }
2779 aSig |= LIT64( 0x0010000000000000 );
2780 shiftCount = 0x433 - aExp;
2781 savedASig = aSig;
2782 aSig >>= shiftCount;
2783 z = aSig;
2784 if ( aSign ) z = - z;
2785 if ( ( z < 0 ) ^ aSign ) {
2786 invalid:
2787 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002788 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00002789 }
2790 if ( ( aSig<<shiftCount ) != savedASig ) {
2791 STATUS(float_exception_flags) |= float_flag_inexact;
2792 }
2793 return z;
2794
2795}
2796
2797/*----------------------------------------------------------------------------
2798| Returns the result of converting the double-precision floating-point value
Peter Maydellcbcef452010-12-07 15:37:34 +00002799| `a' to the 16-bit two's complement integer format. The conversion is
2800| performed according to the IEC/IEEE Standard for Binary Floating-Point
2801| Arithmetic, except that the conversion is always rounded toward zero.
2802| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2803| the conversion overflows, the largest integer with the same sign as `a' is
2804| returned.
2805*----------------------------------------------------------------------------*/
2806
2807int16 float64_to_int16_round_to_zero( float64 a STATUS_PARAM )
2808{
2809 flag aSign;
2810 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002811 uint64_t aSig, savedASig;
Peter Maydellcbcef452010-12-07 15:37:34 +00002812 int32 z;
2813
2814 aSig = extractFloat64Frac( a );
2815 aExp = extractFloat64Exp( a );
2816 aSign = extractFloat64Sign( a );
2817 if ( 0x40E < aExp ) {
2818 if ( ( aExp == 0x7FF ) && aSig ) {
2819 aSign = 0;
2820 }
2821 goto invalid;
2822 }
2823 else if ( aExp < 0x3FF ) {
2824 if ( aExp || aSig ) {
2825 STATUS(float_exception_flags) |= float_flag_inexact;
2826 }
2827 return 0;
2828 }
2829 aSig |= LIT64( 0x0010000000000000 );
2830 shiftCount = 0x433 - aExp;
2831 savedASig = aSig;
2832 aSig >>= shiftCount;
2833 z = aSig;
2834 if ( aSign ) {
2835 z = - z;
2836 }
2837 if ( ( (int16_t)z < 0 ) ^ aSign ) {
2838 invalid:
2839 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002840 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
Peter Maydellcbcef452010-12-07 15:37:34 +00002841 }
2842 if ( ( aSig<<shiftCount ) != savedASig ) {
2843 STATUS(float_exception_flags) |= float_flag_inexact;
2844 }
2845 return z;
2846}
2847
2848/*----------------------------------------------------------------------------
2849| Returns the result of converting the double-precision floating-point value
bellard158142c2005-03-13 16:54:06 +00002850| `a' to the 64-bit two's complement integer format. The conversion is
2851| performed according to the IEC/IEEE Standard for Binary Floating-Point
2852| Arithmetic---which means in particular that the conversion is rounded
2853| according to the current rounding mode. If `a' is a NaN, the largest
2854| positive integer is returned. Otherwise, if the conversion overflows, the
2855| largest integer with the same sign as `a' is returned.
2856*----------------------------------------------------------------------------*/
2857
2858int64 float64_to_int64( float64 a STATUS_PARAM )
2859{
2860 flag aSign;
2861 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002862 uint64_t aSig, aSigExtra;
Peter Maydell37d18662011-01-06 19:37:53 +00002863 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002864
2865 aSig = extractFloat64Frac( a );
2866 aExp = extractFloat64Exp( a );
2867 aSign = extractFloat64Sign( a );
2868 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2869 shiftCount = 0x433 - aExp;
2870 if ( shiftCount <= 0 ) {
2871 if ( 0x43E < aExp ) {
2872 float_raise( float_flag_invalid STATUS_VAR);
2873 if ( ! aSign
2874 || ( ( aExp == 0x7FF )
2875 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2876 ) {
2877 return LIT64( 0x7FFFFFFFFFFFFFFF );
2878 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002879 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00002880 }
2881 aSigExtra = 0;
2882 aSig <<= - shiftCount;
2883 }
2884 else {
2885 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2886 }
2887 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
2888
2889}
2890
2891/*----------------------------------------------------------------------------
2892| Returns the result of converting the double-precision floating-point value
2893| `a' to the 64-bit two's complement integer format. The conversion is
2894| performed according to the IEC/IEEE Standard for Binary Floating-Point
2895| Arithmetic, except that the conversion is always rounded toward zero.
2896| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2897| the conversion overflows, the largest integer with the same sign as `a' is
2898| returned.
2899*----------------------------------------------------------------------------*/
2900
2901int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
2902{
2903 flag aSign;
2904 int16 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002905 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00002906 int64 z;
Peter Maydell37d18662011-01-06 19:37:53 +00002907 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002908
2909 aSig = extractFloat64Frac( a );
2910 aExp = extractFloat64Exp( a );
2911 aSign = extractFloat64Sign( a );
2912 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2913 shiftCount = aExp - 0x433;
2914 if ( 0 <= shiftCount ) {
2915 if ( 0x43E <= aExp ) {
pbrookf090c9d2007-11-18 14:33:24 +00002916 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard158142c2005-03-13 16:54:06 +00002917 float_raise( float_flag_invalid STATUS_VAR);
2918 if ( ! aSign
2919 || ( ( aExp == 0x7FF )
2920 && ( aSig != LIT64( 0x0010000000000000 ) ) )
2921 ) {
2922 return LIT64( 0x7FFFFFFFFFFFFFFF );
2923 }
2924 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002925 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00002926 }
2927 z = aSig<<shiftCount;
2928 }
2929 else {
2930 if ( aExp < 0x3FE ) {
2931 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2932 return 0;
2933 }
2934 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01002935 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00002936 STATUS(float_exception_flags) |= float_flag_inexact;
2937 }
2938 }
2939 if ( aSign ) z = - z;
2940 return z;
2941
2942}
2943
2944/*----------------------------------------------------------------------------
2945| Returns the result of converting the double-precision floating-point value
2946| `a' to the single-precision floating-point format. The conversion is
2947| performed according to the IEC/IEEE Standard for Binary Floating-Point
2948| Arithmetic.
2949*----------------------------------------------------------------------------*/
2950
2951float32 float64_to_float32( float64 a STATUS_PARAM )
2952{
2953 flag aSign;
2954 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002955 uint64_t aSig;
2956 uint32_t zSig;
Peter Maydell37d18662011-01-06 19:37:53 +00002957 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002958
2959 aSig = extractFloat64Frac( a );
2960 aExp = extractFloat64Exp( a );
2961 aSign = extractFloat64Sign( a );
2962 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00002963 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00002964 return packFloat32( aSign, 0xFF, 0 );
2965 }
2966 shift64RightJamming( aSig, 22, &aSig );
2967 zSig = aSig;
2968 if ( aExp || zSig ) {
2969 zSig |= 0x40000000;
2970 aExp -= 0x381;
2971 }
2972 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
2973
2974}
2975
Paul Brook60011492009-11-19 16:45:20 +00002976
2977/*----------------------------------------------------------------------------
2978| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2979| half-precision floating-point value, returning the result. After being
2980| shifted into the proper positions, the three fields are simply added
2981| together to form the result. This means that any integer portion of `zSig'
2982| will be added into the exponent. Since a properly normalized significand
2983| will have an integer portion equal to 1, the `zExp' input should be 1 less
2984| than the desired result exponent whenever `zSig' is a complete, normalized
2985| significand.
2986*----------------------------------------------------------------------------*/
Andreas Färberbb98fe42011-03-07 01:34:06 +01002987static float16 packFloat16(flag zSign, int16 zExp, uint16_t zSig)
Paul Brook60011492009-11-19 16:45:20 +00002988{
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00002989 return make_float16(
Andreas Färberbb98fe42011-03-07 01:34:06 +01002990 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
Paul Brook60011492009-11-19 16:45:20 +00002991}
2992
2993/* Half precision floats come in two formats: standard IEEE and "ARM" format.
2994 The latter gains extra exponent range by omitting the NaN/Inf encodings. */
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00002995
2996float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
Paul Brook60011492009-11-19 16:45:20 +00002997{
2998 flag aSign;
2999 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003000 uint32_t aSig;
Paul Brook60011492009-11-19 16:45:20 +00003001
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003002 aSign = extractFloat16Sign(a);
3003 aExp = extractFloat16Exp(a);
3004 aSig = extractFloat16Frac(a);
Paul Brook60011492009-11-19 16:45:20 +00003005
3006 if (aExp == 0x1f && ieee) {
3007 if (aSig) {
Peter Maydellf591e1b2011-02-10 11:28:59 +00003008 return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003009 }
3010 return packFloat32(aSign, 0xff, aSig << 13);
3011 }
3012 if (aExp == 0) {
3013 int8 shiftCount;
3014
3015 if (aSig == 0) {
3016 return packFloat32(aSign, 0, 0);
3017 }
3018
3019 shiftCount = countLeadingZeros32( aSig ) - 21;
3020 aSig = aSig << shiftCount;
3021 aExp = -shiftCount;
3022 }
3023 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3024}
3025
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003026float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
Paul Brook60011492009-11-19 16:45:20 +00003027{
3028 flag aSign;
3029 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003030 uint32_t aSig;
3031 uint32_t mask;
3032 uint32_t increment;
Paul Brook60011492009-11-19 16:45:20 +00003033 int8 roundingMode;
Peter Maydell37d18662011-01-06 19:37:53 +00003034 a = float32_squash_input_denormal(a STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003035
3036 aSig = extractFloat32Frac( a );
3037 aExp = extractFloat32Exp( a );
3038 aSign = extractFloat32Sign( a );
3039 if ( aExp == 0xFF ) {
3040 if (aSig) {
Peter Maydell600e30d2011-02-10 11:28:58 +00003041 /* Input is a NaN */
3042 float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
3043 if (!ieee) {
3044 return packFloat16(aSign, 0, 0);
3045 }
3046 return r;
Paul Brook60011492009-11-19 16:45:20 +00003047 }
Peter Maydell600e30d2011-02-10 11:28:58 +00003048 /* Infinity */
3049 if (!ieee) {
3050 float_raise(float_flag_invalid STATUS_VAR);
3051 return packFloat16(aSign, 0x1f, 0x3ff);
3052 }
3053 return packFloat16(aSign, 0x1f, 0);
Paul Brook60011492009-11-19 16:45:20 +00003054 }
Peter Maydell600e30d2011-02-10 11:28:58 +00003055 if (aExp == 0 && aSig == 0) {
Paul Brook60011492009-11-19 16:45:20 +00003056 return packFloat16(aSign, 0, 0);
3057 }
3058 /* Decimal point between bits 22 and 23. */
3059 aSig |= 0x00800000;
3060 aExp -= 0x7f;
3061 if (aExp < -14) {
Peter Maydell600e30d2011-02-10 11:28:58 +00003062 mask = 0x00ffffff;
3063 if (aExp >= -24) {
3064 mask >>= 25 + aExp;
Paul Brook60011492009-11-19 16:45:20 +00003065 }
3066 } else {
3067 mask = 0x00001fff;
3068 }
3069 if (aSig & mask) {
3070 float_raise( float_flag_underflow STATUS_VAR );
3071 roundingMode = STATUS(float_rounding_mode);
3072 switch (roundingMode) {
3073 case float_round_nearest_even:
3074 increment = (mask + 1) >> 1;
3075 if ((aSig & mask) == increment) {
3076 increment = aSig & (increment << 1);
3077 }
3078 break;
3079 case float_round_up:
3080 increment = aSign ? 0 : mask;
3081 break;
3082 case float_round_down:
3083 increment = aSign ? mask : 0;
3084 break;
3085 default: /* round_to_zero */
3086 increment = 0;
3087 break;
3088 }
3089 aSig += increment;
3090 if (aSig >= 0x01000000) {
3091 aSig >>= 1;
3092 aExp++;
3093 }
3094 } else if (aExp < -14
3095 && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
3096 float_raise( float_flag_underflow STATUS_VAR);
3097 }
3098
3099 if (ieee) {
3100 if (aExp > 15) {
3101 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
3102 return packFloat16(aSign, 0x1f, 0);
3103 }
3104 } else {
3105 if (aExp > 16) {
Peter Maydell600e30d2011-02-10 11:28:58 +00003106 float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003107 return packFloat16(aSign, 0x1f, 0x3ff);
3108 }
3109 }
3110 if (aExp < -24) {
3111 return packFloat16(aSign, 0, 0);
3112 }
3113 if (aExp < -14) {
3114 aSig >>= -14 - aExp;
3115 aExp = -14;
3116 }
3117 return packFloat16(aSign, aExp + 14, aSig >> 13);
3118}
3119
bellard158142c2005-03-13 16:54:06 +00003120/*----------------------------------------------------------------------------
3121| Returns the result of converting the double-precision floating-point value
3122| `a' to the extended double-precision floating-point format. The conversion
3123| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3124| Arithmetic.
3125*----------------------------------------------------------------------------*/
3126
3127floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
3128{
3129 flag aSign;
3130 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003131 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00003132
Peter Maydell37d18662011-01-06 19:37:53 +00003133 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003134 aSig = extractFloat64Frac( a );
3135 aExp = extractFloat64Exp( a );
3136 aSign = extractFloat64Sign( a );
3137 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00003138 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00003139 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3140 }
3141 if ( aExp == 0 ) {
3142 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3143 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3144 }
3145 return
3146 packFloatx80(
3147 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3148
3149}
3150
bellard158142c2005-03-13 16:54:06 +00003151/*----------------------------------------------------------------------------
3152| Returns the result of converting the double-precision floating-point value
3153| `a' to the quadruple-precision floating-point format. The conversion is
3154| performed according to the IEC/IEEE Standard for Binary Floating-Point
3155| Arithmetic.
3156*----------------------------------------------------------------------------*/
3157
3158float128 float64_to_float128( float64 a STATUS_PARAM )
3159{
3160 flag aSign;
3161 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003162 uint64_t aSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00003163
Peter Maydell37d18662011-01-06 19:37:53 +00003164 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003165 aSig = extractFloat64Frac( a );
3166 aExp = extractFloat64Exp( a );
3167 aSign = extractFloat64Sign( a );
3168 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00003169 if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00003170 return packFloat128( aSign, 0x7FFF, 0, 0 );
3171 }
3172 if ( aExp == 0 ) {
3173 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3174 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3175 --aExp;
3176 }
3177 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3178 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3179
3180}
3181
bellard158142c2005-03-13 16:54:06 +00003182/*----------------------------------------------------------------------------
3183| Rounds the double-precision floating-point value `a' to an integer, and
3184| returns the result as a double-precision floating-point value. The
3185| operation is performed according to the IEC/IEEE Standard for Binary
3186| Floating-Point Arithmetic.
3187*----------------------------------------------------------------------------*/
3188
3189float64 float64_round_to_int( float64 a STATUS_PARAM )
3190{
3191 flag aSign;
3192 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003193 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00003194 int8 roundingMode;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003195 uint64_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00003196 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003197
3198 aExp = extractFloat64Exp( a );
3199 if ( 0x433 <= aExp ) {
3200 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3201 return propagateFloat64NaN( a, a STATUS_VAR );
3202 }
3203 return a;
3204 }
3205 if ( aExp < 0x3FF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01003206 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00003207 STATUS(float_exception_flags) |= float_flag_inexact;
3208 aSign = extractFloat64Sign( a );
3209 switch ( STATUS(float_rounding_mode) ) {
3210 case float_round_nearest_even:
3211 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3212 return packFloat64( aSign, 0x3FF, 0 );
3213 }
3214 break;
3215 case float_round_down:
pbrookf090c9d2007-11-18 14:33:24 +00003216 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard158142c2005-03-13 16:54:06 +00003217 case float_round_up:
pbrookf090c9d2007-11-18 14:33:24 +00003218 return make_float64(
3219 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard158142c2005-03-13 16:54:06 +00003220 }
3221 return packFloat64( aSign, 0, 0 );
3222 }
3223 lastBitMask = 1;
3224 lastBitMask <<= 0x433 - aExp;
3225 roundBitsMask = lastBitMask - 1;
pbrookf090c9d2007-11-18 14:33:24 +00003226 z = float64_val(a);
bellard158142c2005-03-13 16:54:06 +00003227 roundingMode = STATUS(float_rounding_mode);
3228 if ( roundingMode == float_round_nearest_even ) {
3229 z += lastBitMask>>1;
3230 if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
3231 }
3232 else if ( roundingMode != float_round_to_zero ) {
pbrookf090c9d2007-11-18 14:33:24 +00003233 if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
bellard158142c2005-03-13 16:54:06 +00003234 z += roundBitsMask;
3235 }
3236 }
3237 z &= ~ roundBitsMask;
pbrookf090c9d2007-11-18 14:33:24 +00003238 if ( z != float64_val(a) )
3239 STATUS(float_exception_flags) |= float_flag_inexact;
3240 return make_float64(z);
bellard158142c2005-03-13 16:54:06 +00003241
3242}
3243
pbrooke6e59062006-10-22 00:18:54 +00003244float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3245{
3246 int oldmode;
3247 float64 res;
3248 oldmode = STATUS(float_rounding_mode);
3249 STATUS(float_rounding_mode) = float_round_to_zero;
3250 res = float64_round_to_int(a STATUS_VAR);
3251 STATUS(float_rounding_mode) = oldmode;
3252 return res;
3253}
3254
bellard158142c2005-03-13 16:54:06 +00003255/*----------------------------------------------------------------------------
3256| Returns the result of adding the absolute values of the double-precision
3257| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
3258| before being returned. `zSign' is ignored if the result is a NaN.
3259| The addition is performed according to the IEC/IEEE Standard for Binary
3260| Floating-Point Arithmetic.
3261*----------------------------------------------------------------------------*/
3262
3263static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3264{
3265 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003266 uint64_t aSig, bSig, zSig;
bellard158142c2005-03-13 16:54:06 +00003267 int16 expDiff;
3268
3269 aSig = extractFloat64Frac( a );
3270 aExp = extractFloat64Exp( a );
3271 bSig = extractFloat64Frac( b );
3272 bExp = extractFloat64Exp( b );
3273 expDiff = aExp - bExp;
3274 aSig <<= 9;
3275 bSig <<= 9;
3276 if ( 0 < expDiff ) {
3277 if ( aExp == 0x7FF ) {
3278 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3279 return a;
3280 }
3281 if ( bExp == 0 ) {
3282 --expDiff;
3283 }
3284 else {
3285 bSig |= LIT64( 0x2000000000000000 );
3286 }
3287 shift64RightJamming( bSig, expDiff, &bSig );
3288 zExp = aExp;
3289 }
3290 else if ( expDiff < 0 ) {
3291 if ( bExp == 0x7FF ) {
3292 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3293 return packFloat64( zSign, 0x7FF, 0 );
3294 }
3295 if ( aExp == 0 ) {
3296 ++expDiff;
3297 }
3298 else {
3299 aSig |= LIT64( 0x2000000000000000 );
3300 }
3301 shift64RightJamming( aSig, - expDiff, &aSig );
3302 zExp = bExp;
3303 }
3304 else {
3305 if ( aExp == 0x7FF ) {
3306 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3307 return a;
3308 }
pbrookfe76d972008-12-19 14:33:59 +00003309 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01003310 if (STATUS(flush_to_zero)) {
3311 if (aSig | bSig) {
3312 float_raise(float_flag_output_denormal STATUS_VAR);
3313 }
3314 return packFloat64(zSign, 0, 0);
3315 }
pbrookfe76d972008-12-19 14:33:59 +00003316 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3317 }
bellard158142c2005-03-13 16:54:06 +00003318 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3319 zExp = aExp;
3320 goto roundAndPack;
3321 }
3322 aSig |= LIT64( 0x2000000000000000 );
3323 zSig = ( aSig + bSig )<<1;
3324 --zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003325 if ( (int64_t) zSig < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003326 zSig = aSig + bSig;
3327 ++zExp;
3328 }
3329 roundAndPack:
3330 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3331
3332}
3333
3334/*----------------------------------------------------------------------------
3335| Returns the result of subtracting the absolute values of the double-
3336| precision floating-point values `a' and `b'. If `zSign' is 1, the
3337| difference is negated before being returned. `zSign' is ignored if the
3338| result is a NaN. The subtraction is performed according to the IEC/IEEE
3339| Standard for Binary Floating-Point Arithmetic.
3340*----------------------------------------------------------------------------*/
3341
3342static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3343{
3344 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003345 uint64_t aSig, bSig, zSig;
bellard158142c2005-03-13 16:54:06 +00003346 int16 expDiff;
3347
3348 aSig = extractFloat64Frac( a );
3349 aExp = extractFloat64Exp( a );
3350 bSig = extractFloat64Frac( b );
3351 bExp = extractFloat64Exp( b );
3352 expDiff = aExp - bExp;
3353 aSig <<= 10;
3354 bSig <<= 10;
3355 if ( 0 < expDiff ) goto aExpBigger;
3356 if ( expDiff < 0 ) goto bExpBigger;
3357 if ( aExp == 0x7FF ) {
3358 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3359 float_raise( float_flag_invalid STATUS_VAR);
3360 return float64_default_nan;
3361 }
3362 if ( aExp == 0 ) {
3363 aExp = 1;
3364 bExp = 1;
3365 }
3366 if ( bSig < aSig ) goto aBigger;
3367 if ( aSig < bSig ) goto bBigger;
3368 return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3369 bExpBigger:
3370 if ( bExp == 0x7FF ) {
3371 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3372 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3373 }
3374 if ( aExp == 0 ) {
3375 ++expDiff;
3376 }
3377 else {
3378 aSig |= LIT64( 0x4000000000000000 );
3379 }
3380 shift64RightJamming( aSig, - expDiff, &aSig );
3381 bSig |= LIT64( 0x4000000000000000 );
3382 bBigger:
3383 zSig = bSig - aSig;
3384 zExp = bExp;
3385 zSign ^= 1;
3386 goto normalizeRoundAndPack;
3387 aExpBigger:
3388 if ( aExp == 0x7FF ) {
3389 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3390 return a;
3391 }
3392 if ( bExp == 0 ) {
3393 --expDiff;
3394 }
3395 else {
3396 bSig |= LIT64( 0x4000000000000000 );
3397 }
3398 shift64RightJamming( bSig, expDiff, &bSig );
3399 aSig |= LIT64( 0x4000000000000000 );
3400 aBigger:
3401 zSig = aSig - bSig;
3402 zExp = aExp;
3403 normalizeRoundAndPack:
3404 --zExp;
3405 return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3406
3407}
3408
3409/*----------------------------------------------------------------------------
3410| Returns the result of adding the double-precision floating-point values `a'
3411| and `b'. The operation is performed according to the IEC/IEEE Standard for
3412| Binary Floating-Point Arithmetic.
3413*----------------------------------------------------------------------------*/
3414
3415float64 float64_add( float64 a, float64 b STATUS_PARAM )
3416{
3417 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00003418 a = float64_squash_input_denormal(a STATUS_VAR);
3419 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003420
3421 aSign = extractFloat64Sign( a );
3422 bSign = extractFloat64Sign( b );
3423 if ( aSign == bSign ) {
3424 return addFloat64Sigs( a, b, aSign STATUS_VAR );
3425 }
3426 else {
3427 return subFloat64Sigs( a, b, aSign STATUS_VAR );
3428 }
3429
3430}
3431
3432/*----------------------------------------------------------------------------
3433| Returns the result of subtracting the double-precision floating-point values
3434| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3435| for Binary Floating-Point Arithmetic.
3436*----------------------------------------------------------------------------*/
3437
3438float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3439{
3440 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00003441 a = float64_squash_input_denormal(a STATUS_VAR);
3442 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003443
3444 aSign = extractFloat64Sign( a );
3445 bSign = extractFloat64Sign( b );
3446 if ( aSign == bSign ) {
3447 return subFloat64Sigs( a, b, aSign STATUS_VAR );
3448 }
3449 else {
3450 return addFloat64Sigs( a, b, aSign STATUS_VAR );
3451 }
3452
3453}
3454
3455/*----------------------------------------------------------------------------
3456| Returns the result of multiplying the double-precision floating-point values
3457| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3458| for Binary Floating-Point Arithmetic.
3459*----------------------------------------------------------------------------*/
3460
3461float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3462{
3463 flag aSign, bSign, zSign;
3464 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003465 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00003466
Peter Maydell37d18662011-01-06 19:37:53 +00003467 a = float64_squash_input_denormal(a STATUS_VAR);
3468 b = float64_squash_input_denormal(b STATUS_VAR);
3469
bellard158142c2005-03-13 16:54:06 +00003470 aSig = extractFloat64Frac( a );
3471 aExp = extractFloat64Exp( a );
3472 aSign = extractFloat64Sign( a );
3473 bSig = extractFloat64Frac( b );
3474 bExp = extractFloat64Exp( b );
3475 bSign = extractFloat64Sign( b );
3476 zSign = aSign ^ bSign;
3477 if ( aExp == 0x7FF ) {
3478 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3479 return propagateFloat64NaN( a, b STATUS_VAR );
3480 }
3481 if ( ( bExp | bSig ) == 0 ) {
3482 float_raise( float_flag_invalid STATUS_VAR);
3483 return float64_default_nan;
3484 }
3485 return packFloat64( zSign, 0x7FF, 0 );
3486 }
3487 if ( bExp == 0x7FF ) {
3488 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3489 if ( ( aExp | aSig ) == 0 ) {
3490 float_raise( float_flag_invalid STATUS_VAR);
3491 return float64_default_nan;
3492 }
3493 return packFloat64( zSign, 0x7FF, 0 );
3494 }
3495 if ( aExp == 0 ) {
3496 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3497 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3498 }
3499 if ( bExp == 0 ) {
3500 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3501 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3502 }
3503 zExp = aExp + bExp - 0x3FF;
3504 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3505 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3506 mul64To128( aSig, bSig, &zSig0, &zSig1 );
3507 zSig0 |= ( zSig1 != 0 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003508 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00003509 zSig0 <<= 1;
3510 --zExp;
3511 }
3512 return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3513
3514}
3515
3516/*----------------------------------------------------------------------------
3517| Returns the result of dividing the double-precision floating-point value `a'
3518| by the corresponding value `b'. The operation is performed according to
3519| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3520*----------------------------------------------------------------------------*/
3521
3522float64 float64_div( float64 a, float64 b STATUS_PARAM )
3523{
3524 flag aSign, bSign, zSign;
3525 int16 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003526 uint64_t aSig, bSig, zSig;
3527 uint64_t rem0, rem1;
3528 uint64_t term0, term1;
Peter Maydell37d18662011-01-06 19:37:53 +00003529 a = float64_squash_input_denormal(a STATUS_VAR);
3530 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003531
3532 aSig = extractFloat64Frac( a );
3533 aExp = extractFloat64Exp( a );
3534 aSign = extractFloat64Sign( a );
3535 bSig = extractFloat64Frac( b );
3536 bExp = extractFloat64Exp( b );
3537 bSign = extractFloat64Sign( b );
3538 zSign = aSign ^ bSign;
3539 if ( aExp == 0x7FF ) {
3540 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3541 if ( bExp == 0x7FF ) {
3542 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3543 float_raise( float_flag_invalid STATUS_VAR);
3544 return float64_default_nan;
3545 }
3546 return packFloat64( zSign, 0x7FF, 0 );
3547 }
3548 if ( bExp == 0x7FF ) {
3549 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3550 return packFloat64( zSign, 0, 0 );
3551 }
3552 if ( bExp == 0 ) {
3553 if ( bSig == 0 ) {
3554 if ( ( aExp | aSig ) == 0 ) {
3555 float_raise( float_flag_invalid STATUS_VAR);
3556 return float64_default_nan;
3557 }
3558 float_raise( float_flag_divbyzero STATUS_VAR);
3559 return packFloat64( zSign, 0x7FF, 0 );
3560 }
3561 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3562 }
3563 if ( aExp == 0 ) {
3564 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3565 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3566 }
3567 zExp = aExp - bExp + 0x3FD;
3568 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3569 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3570 if ( bSig <= ( aSig + aSig ) ) {
3571 aSig >>= 1;
3572 ++zExp;
3573 }
3574 zSig = estimateDiv128To64( aSig, 0, bSig );
3575 if ( ( zSig & 0x1FF ) <= 2 ) {
3576 mul64To128( bSig, zSig, &term0, &term1 );
3577 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003578 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003579 --zSig;
3580 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3581 }
3582 zSig |= ( rem1 != 0 );
3583 }
3584 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3585
3586}
3587
3588/*----------------------------------------------------------------------------
3589| Returns the remainder of the double-precision floating-point value `a'
3590| with respect to the corresponding value `b'. The operation is performed
3591| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3592*----------------------------------------------------------------------------*/
3593
3594float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3595{
Blue Swirled086f32010-03-07 13:49:58 +00003596 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00003597 int16 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003598 uint64_t aSig, bSig;
3599 uint64_t q, alternateASig;
3600 int64_t sigMean;
bellard158142c2005-03-13 16:54:06 +00003601
Peter Maydell37d18662011-01-06 19:37:53 +00003602 a = float64_squash_input_denormal(a STATUS_VAR);
3603 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003604 aSig = extractFloat64Frac( a );
3605 aExp = extractFloat64Exp( a );
3606 aSign = extractFloat64Sign( a );
3607 bSig = extractFloat64Frac( b );
3608 bExp = extractFloat64Exp( b );
bellard158142c2005-03-13 16:54:06 +00003609 if ( aExp == 0x7FF ) {
3610 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3611 return propagateFloat64NaN( a, b STATUS_VAR );
3612 }
3613 float_raise( float_flag_invalid STATUS_VAR);
3614 return float64_default_nan;
3615 }
3616 if ( bExp == 0x7FF ) {
3617 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3618 return a;
3619 }
3620 if ( bExp == 0 ) {
3621 if ( bSig == 0 ) {
3622 float_raise( float_flag_invalid STATUS_VAR);
3623 return float64_default_nan;
3624 }
3625 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3626 }
3627 if ( aExp == 0 ) {
3628 if ( aSig == 0 ) return a;
3629 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3630 }
3631 expDiff = aExp - bExp;
3632 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3633 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3634 if ( expDiff < 0 ) {
3635 if ( expDiff < -1 ) return a;
3636 aSig >>= 1;
3637 }
3638 q = ( bSig <= aSig );
3639 if ( q ) aSig -= bSig;
3640 expDiff -= 64;
3641 while ( 0 < expDiff ) {
3642 q = estimateDiv128To64( aSig, 0, bSig );
3643 q = ( 2 < q ) ? q - 2 : 0;
3644 aSig = - ( ( bSig>>2 ) * q );
3645 expDiff -= 62;
3646 }
3647 expDiff += 64;
3648 if ( 0 < expDiff ) {
3649 q = estimateDiv128To64( aSig, 0, bSig );
3650 q = ( 2 < q ) ? q - 2 : 0;
3651 q >>= 64 - expDiff;
3652 bSig >>= 2;
3653 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3654 }
3655 else {
3656 aSig >>= 2;
3657 bSig >>= 2;
3658 }
3659 do {
3660 alternateASig = aSig;
3661 ++q;
3662 aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003663 } while ( 0 <= (int64_t) aSig );
bellard158142c2005-03-13 16:54:06 +00003664 sigMean = aSig + alternateASig;
3665 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3666 aSig = alternateASig;
3667 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01003668 zSign = ( (int64_t) aSig < 0 );
bellard158142c2005-03-13 16:54:06 +00003669 if ( zSign ) aSig = - aSig;
3670 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
3671
3672}
3673
3674/*----------------------------------------------------------------------------
Peter Maydell369be8f2011-10-19 16:14:06 +00003675| Returns the result of multiplying the double-precision floating-point values
3676| `a' and `b' then adding 'c', with no intermediate rounding step after the
3677| multiplication. The operation is performed according to the IEC/IEEE
3678| Standard for Binary Floating-Point Arithmetic 754-2008.
3679| The flags argument allows the caller to select negation of the
3680| addend, the intermediate product, or the final result. (The difference
3681| between this and having the caller do a separate negation is that negating
3682| externally will flip the sign bit on NaNs.)
3683*----------------------------------------------------------------------------*/
3684
3685float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
3686{
3687 flag aSign, bSign, cSign, zSign;
3688 int aExp, bExp, cExp, pExp, zExp, expDiff;
3689 uint64_t aSig, bSig, cSig;
3690 flag pInf, pZero, pSign;
3691 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
3692 int shiftcount;
3693 flag signflip, infzero;
3694
3695 a = float64_squash_input_denormal(a STATUS_VAR);
3696 b = float64_squash_input_denormal(b STATUS_VAR);
3697 c = float64_squash_input_denormal(c STATUS_VAR);
3698 aSig = extractFloat64Frac(a);
3699 aExp = extractFloat64Exp(a);
3700 aSign = extractFloat64Sign(a);
3701 bSig = extractFloat64Frac(b);
3702 bExp = extractFloat64Exp(b);
3703 bSign = extractFloat64Sign(b);
3704 cSig = extractFloat64Frac(c);
3705 cExp = extractFloat64Exp(c);
3706 cSign = extractFloat64Sign(c);
3707
3708 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
3709 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
3710
3711 /* It is implementation-defined whether the cases of (0,inf,qnan)
3712 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
3713 * they return if they do), so we have to hand this information
3714 * off to the target-specific pick-a-NaN routine.
3715 */
3716 if (((aExp == 0x7ff) && aSig) ||
3717 ((bExp == 0x7ff) && bSig) ||
3718 ((cExp == 0x7ff) && cSig)) {
3719 return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
3720 }
3721
3722 if (infzero) {
3723 float_raise(float_flag_invalid STATUS_VAR);
3724 return float64_default_nan;
3725 }
3726
3727 if (flags & float_muladd_negate_c) {
3728 cSign ^= 1;
3729 }
3730
3731 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
3732
3733 /* Work out the sign and type of the product */
3734 pSign = aSign ^ bSign;
3735 if (flags & float_muladd_negate_product) {
3736 pSign ^= 1;
3737 }
3738 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
3739 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
3740
3741 if (cExp == 0x7ff) {
3742 if (pInf && (pSign ^ cSign)) {
3743 /* addition of opposite-signed infinities => InvalidOperation */
3744 float_raise(float_flag_invalid STATUS_VAR);
3745 return float64_default_nan;
3746 }
3747 /* Otherwise generate an infinity of the same sign */
3748 return packFloat64(cSign ^ signflip, 0x7ff, 0);
3749 }
3750
3751 if (pInf) {
3752 return packFloat64(pSign ^ signflip, 0x7ff, 0);
3753 }
3754
3755 if (pZero) {
3756 if (cExp == 0) {
3757 if (cSig == 0) {
3758 /* Adding two exact zeroes */
3759 if (pSign == cSign) {
3760 zSign = pSign;
3761 } else if (STATUS(float_rounding_mode) == float_round_down) {
3762 zSign = 1;
3763 } else {
3764 zSign = 0;
3765 }
3766 return packFloat64(zSign ^ signflip, 0, 0);
3767 }
3768 /* Exact zero plus a denorm */
3769 if (STATUS(flush_to_zero)) {
3770 float_raise(float_flag_output_denormal STATUS_VAR);
3771 return packFloat64(cSign ^ signflip, 0, 0);
3772 }
3773 }
3774 /* Zero plus something non-zero : just return the something */
3775 return c ^ ((uint64_t)signflip << 63);
3776 }
3777
3778 if (aExp == 0) {
3779 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
3780 }
3781 if (bExp == 0) {
3782 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
3783 }
3784
3785 /* Calculate the actual result a * b + c */
3786
3787 /* Multiply first; this is easy. */
3788 /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
3789 * because we want the true exponent, not the "one-less-than"
3790 * flavour that roundAndPackFloat64() takes.
3791 */
3792 pExp = aExp + bExp - 0x3fe;
3793 aSig = (aSig | LIT64(0x0010000000000000))<<10;
3794 bSig = (bSig | LIT64(0x0010000000000000))<<11;
3795 mul64To128(aSig, bSig, &pSig0, &pSig1);
3796 if ((int64_t)(pSig0 << 1) >= 0) {
3797 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
3798 pExp--;
3799 }
3800
3801 zSign = pSign ^ signflip;
3802
3803 /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
3804 * bit in position 126.
3805 */
3806 if (cExp == 0) {
3807 if (!cSig) {
3808 /* Throw out the special case of c being an exact zero now */
3809 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
3810 return roundAndPackFloat64(zSign, pExp - 1,
3811 pSig1 STATUS_VAR);
3812 }
3813 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
3814 }
3815
3816 /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
3817 * significand of the addend, with the explicit bit in position 126.
3818 */
3819 cSig0 = cSig << (126 - 64 - 52);
3820 cSig1 = 0;
3821 cSig0 |= LIT64(0x4000000000000000);
3822 expDiff = pExp - cExp;
3823
3824 if (pSign == cSign) {
3825 /* Addition */
3826 if (expDiff > 0) {
3827 /* scale c to match p */
3828 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3829 zExp = pExp;
3830 } else if (expDiff < 0) {
3831 /* scale p to match c */
3832 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3833 zExp = cExp;
3834 } else {
3835 /* no scaling needed */
3836 zExp = cExp;
3837 }
3838 /* Add significands and make sure explicit bit ends up in posn 126 */
3839 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3840 if ((int64_t)zSig0 < 0) {
3841 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
3842 } else {
3843 zExp--;
3844 }
3845 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
3846 return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
3847 } else {
3848 /* Subtraction */
3849 if (expDiff > 0) {
3850 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
3851 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3852 zExp = pExp;
3853 } else if (expDiff < 0) {
3854 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
3855 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
3856 zExp = cExp;
3857 zSign ^= 1;
3858 } else {
3859 zExp = pExp;
3860 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
3861 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
3862 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
3863 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
3864 zSign ^= 1;
3865 } else {
3866 /* Exact zero */
3867 zSign = signflip;
3868 if (STATUS(float_rounding_mode) == float_round_down) {
3869 zSign ^= 1;
3870 }
3871 return packFloat64(zSign, 0, 0);
3872 }
3873 }
3874 --zExp;
3875 /* Do the equivalent of normalizeRoundAndPackFloat64() but
3876 * starting with the significand in a pair of uint64_t.
3877 */
3878 if (zSig0) {
3879 shiftcount = countLeadingZeros64(zSig0) - 1;
3880 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
3881 if (zSig1) {
3882 zSig0 |= 1;
3883 }
3884 zExp -= shiftcount;
3885 } else {
3886 shiftcount = countLeadingZeros64(zSig1) - 1;
3887 zSig0 = zSig1 << shiftcount;
3888 zExp -= (shiftcount + 64);
3889 }
3890 return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
3891 }
3892}
3893
3894/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00003895| Returns the square root of the double-precision floating-point value `a'.
3896| The operation is performed according to the IEC/IEEE Standard for Binary
3897| Floating-Point Arithmetic.
3898*----------------------------------------------------------------------------*/
3899
3900float64 float64_sqrt( float64 a STATUS_PARAM )
3901{
3902 flag aSign;
3903 int16 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003904 uint64_t aSig, zSig, doubleZSig;
3905 uint64_t rem0, rem1, term0, term1;
Peter Maydell37d18662011-01-06 19:37:53 +00003906 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003907
3908 aSig = extractFloat64Frac( a );
3909 aExp = extractFloat64Exp( a );
3910 aSign = extractFloat64Sign( a );
3911 if ( aExp == 0x7FF ) {
3912 if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
3913 if ( ! aSign ) return a;
3914 float_raise( float_flag_invalid STATUS_VAR);
3915 return float64_default_nan;
3916 }
3917 if ( aSign ) {
3918 if ( ( aExp | aSig ) == 0 ) return a;
3919 float_raise( float_flag_invalid STATUS_VAR);
3920 return float64_default_nan;
3921 }
3922 if ( aExp == 0 ) {
pbrookf090c9d2007-11-18 14:33:24 +00003923 if ( aSig == 0 ) return float64_zero;
bellard158142c2005-03-13 16:54:06 +00003924 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3925 }
3926 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3927 aSig |= LIT64( 0x0010000000000000 );
3928 zSig = estimateSqrt32( aExp, aSig>>21 );
3929 aSig <<= 9 - ( aExp & 1 );
3930 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3931 if ( ( zSig & 0x1FF ) <= 5 ) {
3932 doubleZSig = zSig<<1;
3933 mul64To128( zSig, zSig, &term0, &term1 );
3934 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003935 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003936 --zSig;
3937 doubleZSig -= 2;
3938 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3939 }
3940 zSig |= ( ( rem0 | rem1 ) != 0 );
3941 }
3942 return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
3943
3944}
3945
3946/*----------------------------------------------------------------------------
aurel32374dfc32009-02-05 13:42:47 +00003947| Returns the binary log of the double-precision floating-point value `a'.
3948| The operation is performed according to the IEC/IEEE Standard for Binary
3949| Floating-Point Arithmetic.
3950*----------------------------------------------------------------------------*/
3951float64 float64_log2( float64 a STATUS_PARAM )
3952{
3953 flag aSign, zSign;
3954 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003955 uint64_t aSig, aSig0, aSig1, zSig, i;
Peter Maydell37d18662011-01-06 19:37:53 +00003956 a = float64_squash_input_denormal(a STATUS_VAR);
aurel32374dfc32009-02-05 13:42:47 +00003957
3958 aSig = extractFloat64Frac( a );
3959 aExp = extractFloat64Exp( a );
3960 aSign = extractFloat64Sign( a );
3961
3962 if ( aExp == 0 ) {
3963 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
3964 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3965 }
3966 if ( aSign ) {
3967 float_raise( float_flag_invalid STATUS_VAR);
3968 return float64_default_nan;
3969 }
3970 if ( aExp == 0x7FF ) {
3971 if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
3972 return a;
3973 }
3974
3975 aExp -= 0x3FF;
3976 aSig |= LIT64( 0x0010000000000000 );
3977 zSign = aExp < 0;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003978 zSig = (uint64_t)aExp << 52;
aurel32374dfc32009-02-05 13:42:47 +00003979 for (i = 1LL << 51; i > 0; i >>= 1) {
3980 mul64To128( aSig, aSig, &aSig0, &aSig1 );
3981 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
3982 if ( aSig & LIT64( 0x0020000000000000 ) ) {
3983 aSig >>= 1;
3984 zSig |= i;
3985 }
3986 }
3987
3988 if ( zSign )
3989 zSig = -zSig;
3990 return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
3991}
3992
3993/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00003994| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarnob6893622011-04-14 00:49:29 +02003995| corresponding value `b', and 0 otherwise. The invalid exception is raised
3996| if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00003997| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3998*----------------------------------------------------------------------------*/
3999
Aurelien Jarnob6893622011-04-14 00:49:29 +02004000int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004001{
Andreas Färberbb98fe42011-03-07 01:34:06 +01004002 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004003 a = float64_squash_input_denormal(a STATUS_VAR);
4004 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004005
4006 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4007 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4008 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02004009 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004010 return 0;
4011 }
pbrookf090c9d2007-11-18 14:33:24 +00004012 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004013 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004014 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00004015
4016}
4017
4018/*----------------------------------------------------------------------------
4019| Returns 1 if the double-precision floating-point value `a' is less than or
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004020| equal to the corresponding value `b', and 0 otherwise. The invalid
4021| exception is raised if either operand is a NaN. The comparison is performed
4022| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004023*----------------------------------------------------------------------------*/
4024
bellard750afe92006-10-28 19:27:11 +00004025int float64_le( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004026{
4027 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004028 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004029 a = float64_squash_input_denormal(a STATUS_VAR);
4030 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004031
4032 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4033 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4034 ) {
4035 float_raise( float_flag_invalid STATUS_VAR);
4036 return 0;
4037 }
4038 aSign = extractFloat64Sign( a );
4039 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004040 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004041 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004042 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004043 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004044
4045}
4046
4047/*----------------------------------------------------------------------------
4048| Returns 1 if the double-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004049| the corresponding value `b', and 0 otherwise. The invalid exception is
4050| raised if either operand is a NaN. The comparison is performed according
4051| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004052*----------------------------------------------------------------------------*/
4053
bellard750afe92006-10-28 19:27:11 +00004054int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004055{
4056 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004057 uint64_t av, bv;
bellard158142c2005-03-13 16:54:06 +00004058
Peter Maydell37d18662011-01-06 19:37:53 +00004059 a = float64_squash_input_denormal(a STATUS_VAR);
4060 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004061 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4062 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4063 ) {
4064 float_raise( float_flag_invalid STATUS_VAR);
4065 return 0;
4066 }
4067 aSign = extractFloat64Sign( a );
4068 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004069 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004070 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004071 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004072 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004073
4074}
4075
4076/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02004077| Returns 1 if the double-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004078| be compared, and 0 otherwise. The invalid exception is raised if either
4079| operand is a NaN. The comparison is performed according to the IEC/IEEE
4080| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02004081*----------------------------------------------------------------------------*/
4082
4083int float64_unordered( float64 a, float64 b STATUS_PARAM )
4084{
4085 a = float64_squash_input_denormal(a STATUS_VAR);
4086 b = float64_squash_input_denormal(b STATUS_VAR);
4087
4088 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4089 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4090 ) {
4091 float_raise( float_flag_invalid STATUS_VAR);
4092 return 1;
4093 }
4094 return 0;
4095}
4096
4097/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00004098| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004099| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4100| exception.The comparison is performed according to the IEC/IEEE Standard
4101| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004102*----------------------------------------------------------------------------*/
4103
Aurelien Jarnob6893622011-04-14 00:49:29 +02004104int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004105{
Andreas Färberbb98fe42011-03-07 01:34:06 +01004106 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004107 a = float64_squash_input_denormal(a STATUS_VAR);
4108 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004109
4110 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4111 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4112 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02004113 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4114 float_raise( float_flag_invalid STATUS_VAR);
4115 }
bellard158142c2005-03-13 16:54:06 +00004116 return 0;
4117 }
pbrookf090c9d2007-11-18 14:33:24 +00004118 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004119 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004120 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00004121
4122}
4123
4124/*----------------------------------------------------------------------------
4125| Returns 1 if the double-precision floating-point value `a' is less than or
4126| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4127| cause an exception. Otherwise, the comparison is performed according to the
4128| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4129*----------------------------------------------------------------------------*/
4130
bellard750afe92006-10-28 19:27:11 +00004131int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004132{
4133 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004134 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004135 a = float64_squash_input_denormal(a STATUS_VAR);
4136 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004137
4138 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4139 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4140 ) {
4141 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4142 float_raise( float_flag_invalid STATUS_VAR);
4143 }
4144 return 0;
4145 }
4146 aSign = extractFloat64Sign( a );
4147 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004148 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004149 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004150 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004151 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004152
4153}
4154
4155/*----------------------------------------------------------------------------
4156| Returns 1 if the double-precision floating-point value `a' is less than
4157| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4158| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4159| Standard for Binary Floating-Point Arithmetic.
4160*----------------------------------------------------------------------------*/
4161
bellard750afe92006-10-28 19:27:11 +00004162int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004163{
4164 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004165 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004166 a = float64_squash_input_denormal(a STATUS_VAR);
4167 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004168
4169 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4170 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4171 ) {
4172 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4173 float_raise( float_flag_invalid STATUS_VAR);
4174 }
4175 return 0;
4176 }
4177 aSign = extractFloat64Sign( a );
4178 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004179 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004180 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004181 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004182 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004183
4184}
4185
Aurelien Jarno67b78612011-04-14 00:49:29 +02004186/*----------------------------------------------------------------------------
4187| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4188| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4189| comparison is performed according to the IEC/IEEE Standard for Binary
4190| Floating-Point Arithmetic.
4191*----------------------------------------------------------------------------*/
4192
4193int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
4194{
4195 a = float64_squash_input_denormal(a STATUS_VAR);
4196 b = float64_squash_input_denormal(b STATUS_VAR);
4197
4198 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4199 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4200 ) {
4201 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4202 float_raise( float_flag_invalid STATUS_VAR);
4203 }
4204 return 1;
4205 }
4206 return 0;
4207}
4208
bellard158142c2005-03-13 16:54:06 +00004209/*----------------------------------------------------------------------------
4210| Returns the result of converting the extended double-precision floating-
4211| point value `a' to the 32-bit two's complement integer format. The
4212| conversion is performed according to the IEC/IEEE Standard for Binary
4213| Floating-Point Arithmetic---which means in particular that the conversion
4214| is rounded according to the current rounding mode. If `a' is a NaN, the
4215| largest positive integer is returned. Otherwise, if the conversion
4216| overflows, the largest integer with the same sign as `a' is returned.
4217*----------------------------------------------------------------------------*/
4218
4219int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
4220{
4221 flag aSign;
4222 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004223 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004224
4225 aSig = extractFloatx80Frac( a );
4226 aExp = extractFloatx80Exp( a );
4227 aSign = extractFloatx80Sign( a );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004228 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard158142c2005-03-13 16:54:06 +00004229 shiftCount = 0x4037 - aExp;
4230 if ( shiftCount <= 0 ) shiftCount = 1;
4231 shift64RightJamming( aSig, shiftCount, &aSig );
4232 return roundAndPackInt32( aSign, aSig STATUS_VAR );
4233
4234}
4235
4236/*----------------------------------------------------------------------------
4237| Returns the result of converting the extended double-precision floating-
4238| point value `a' to the 32-bit two's complement integer format. The
4239| conversion is performed according to the IEC/IEEE Standard for Binary
4240| Floating-Point Arithmetic, except that the conversion is always rounded
4241| toward zero. If `a' is a NaN, the largest positive integer is returned.
4242| Otherwise, if the conversion overflows, the largest integer with the same
4243| sign as `a' is returned.
4244*----------------------------------------------------------------------------*/
4245
4246int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
4247{
4248 flag aSign;
4249 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004250 uint64_t aSig, savedASig;
bellard158142c2005-03-13 16:54:06 +00004251 int32 z;
4252
4253 aSig = extractFloatx80Frac( a );
4254 aExp = extractFloatx80Exp( a );
4255 aSign = extractFloatx80Sign( a );
4256 if ( 0x401E < aExp ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004257 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard158142c2005-03-13 16:54:06 +00004258 goto invalid;
4259 }
4260 else if ( aExp < 0x3FFF ) {
4261 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4262 return 0;
4263 }
4264 shiftCount = 0x403E - aExp;
4265 savedASig = aSig;
4266 aSig >>= shiftCount;
4267 z = aSig;
4268 if ( aSign ) z = - z;
4269 if ( ( z < 0 ) ^ aSign ) {
4270 invalid:
4271 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004272 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00004273 }
4274 if ( ( aSig<<shiftCount ) != savedASig ) {
4275 STATUS(float_exception_flags) |= float_flag_inexact;
4276 }
4277 return z;
4278
4279}
4280
4281/*----------------------------------------------------------------------------
4282| Returns the result of converting the extended double-precision floating-
4283| point value `a' to the 64-bit two's complement integer format. The
4284| conversion is performed according to the IEC/IEEE Standard for Binary
4285| Floating-Point Arithmetic---which means in particular that the conversion
4286| is rounded according to the current rounding mode. If `a' is a NaN,
4287| the largest positive integer is returned. Otherwise, if the conversion
4288| overflows, the largest integer with the same sign as `a' is returned.
4289*----------------------------------------------------------------------------*/
4290
4291int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
4292{
4293 flag aSign;
4294 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004295 uint64_t aSig, aSigExtra;
bellard158142c2005-03-13 16:54:06 +00004296
4297 aSig = extractFloatx80Frac( a );
4298 aExp = extractFloatx80Exp( a );
4299 aSign = extractFloatx80Sign( a );
4300 shiftCount = 0x403E - aExp;
4301 if ( shiftCount <= 0 ) {
4302 if ( shiftCount ) {
4303 float_raise( float_flag_invalid STATUS_VAR);
4304 if ( ! aSign
4305 || ( ( aExp == 0x7FFF )
4306 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4307 ) {
4308 return LIT64( 0x7FFFFFFFFFFFFFFF );
4309 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01004310 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00004311 }
4312 aSigExtra = 0;
4313 }
4314 else {
4315 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4316 }
4317 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
4318
4319}
4320
4321/*----------------------------------------------------------------------------
4322| Returns the result of converting the extended double-precision floating-
4323| point value `a' to the 64-bit two's complement integer format. The
4324| conversion is performed according to the IEC/IEEE Standard for Binary
4325| Floating-Point Arithmetic, except that the conversion is always rounded
4326| toward zero. If `a' is a NaN, the largest positive integer is returned.
4327| Otherwise, if the conversion overflows, the largest integer with the same
4328| sign as `a' is returned.
4329*----------------------------------------------------------------------------*/
4330
4331int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
4332{
4333 flag aSign;
4334 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004335 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004336 int64 z;
4337
4338 aSig = extractFloatx80Frac( a );
4339 aExp = extractFloatx80Exp( a );
4340 aSign = extractFloatx80Sign( a );
4341 shiftCount = aExp - 0x403E;
4342 if ( 0 <= shiftCount ) {
4343 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4344 if ( ( a.high != 0xC03E ) || aSig ) {
4345 float_raise( float_flag_invalid STATUS_VAR);
4346 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4347 return LIT64( 0x7FFFFFFFFFFFFFFF );
4348 }
4349 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01004350 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00004351 }
4352 else if ( aExp < 0x3FFF ) {
4353 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4354 return 0;
4355 }
4356 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004357 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00004358 STATUS(float_exception_flags) |= float_flag_inexact;
4359 }
4360 if ( aSign ) z = - z;
4361 return z;
4362
4363}
4364
4365/*----------------------------------------------------------------------------
4366| Returns the result of converting the extended double-precision floating-
4367| point value `a' to the single-precision floating-point format. The
4368| conversion is performed according to the IEC/IEEE Standard for Binary
4369| Floating-Point Arithmetic.
4370*----------------------------------------------------------------------------*/
4371
4372float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
4373{
4374 flag aSign;
4375 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004376 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004377
4378 aSig = extractFloatx80Frac( a );
4379 aExp = extractFloatx80Exp( a );
4380 aSign = extractFloatx80Sign( a );
4381 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004382 if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004383 return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004384 }
4385 return packFloat32( aSign, 0xFF, 0 );
4386 }
4387 shift64RightJamming( aSig, 33, &aSig );
4388 if ( aExp || aSig ) aExp -= 0x3F81;
4389 return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
4390
4391}
4392
4393/*----------------------------------------------------------------------------
4394| Returns the result of converting the extended double-precision floating-
4395| point value `a' to the double-precision floating-point format. The
4396| conversion is performed according to the IEC/IEEE Standard for Binary
4397| Floating-Point Arithmetic.
4398*----------------------------------------------------------------------------*/
4399
4400float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
4401{
4402 flag aSign;
4403 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004404 uint64_t aSig, zSig;
bellard158142c2005-03-13 16:54:06 +00004405
4406 aSig = extractFloatx80Frac( a );
4407 aExp = extractFloatx80Exp( a );
4408 aSign = extractFloatx80Sign( a );
4409 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004410 if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004411 return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004412 }
4413 return packFloat64( aSign, 0x7FF, 0 );
4414 }
4415 shift64RightJamming( aSig, 1, &zSig );
4416 if ( aExp || aSig ) aExp -= 0x3C01;
4417 return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4418
4419}
4420
bellard158142c2005-03-13 16:54:06 +00004421/*----------------------------------------------------------------------------
4422| Returns the result of converting the extended double-precision floating-
4423| point value `a' to the quadruple-precision floating-point format. The
4424| conversion is performed according to the IEC/IEEE Standard for Binary
4425| Floating-Point Arithmetic.
4426*----------------------------------------------------------------------------*/
4427
4428float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4429{
4430 flag aSign;
4431 int16 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004432 uint64_t aSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004433
4434 aSig = extractFloatx80Frac( a );
4435 aExp = extractFloatx80Exp( a );
4436 aSign = extractFloatx80Sign( a );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004437 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004438 return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004439 }
4440 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4441 return packFloat128( aSign, aExp, zSig0, zSig1 );
4442
4443}
4444
bellard158142c2005-03-13 16:54:06 +00004445/*----------------------------------------------------------------------------
4446| Rounds the extended double-precision floating-point value `a' to an integer,
4447| and returns the result as an extended quadruple-precision floating-point
4448| value. The operation is performed according to the IEC/IEEE Standard for
4449| Binary Floating-Point Arithmetic.
4450*----------------------------------------------------------------------------*/
4451
4452floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4453{
4454 flag aSign;
4455 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004456 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00004457 int8 roundingMode;
4458 floatx80 z;
4459
4460 aExp = extractFloatx80Exp( a );
4461 if ( 0x403E <= aExp ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004462 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004463 return propagateFloatx80NaN( a, a STATUS_VAR );
4464 }
4465 return a;
4466 }
4467 if ( aExp < 0x3FFF ) {
4468 if ( ( aExp == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01004469 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
bellard158142c2005-03-13 16:54:06 +00004470 return a;
4471 }
4472 STATUS(float_exception_flags) |= float_flag_inexact;
4473 aSign = extractFloatx80Sign( a );
4474 switch ( STATUS(float_rounding_mode) ) {
4475 case float_round_nearest_even:
Andreas Färberbb98fe42011-03-07 01:34:06 +01004476 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
bellard158142c2005-03-13 16:54:06 +00004477 ) {
4478 return
4479 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4480 }
4481 break;
4482 case float_round_down:
4483 return
4484 aSign ?
4485 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4486 : packFloatx80( 0, 0, 0 );
4487 case float_round_up:
4488 return
4489 aSign ? packFloatx80( 1, 0, 0 )
4490 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4491 }
4492 return packFloatx80( aSign, 0, 0 );
4493 }
4494 lastBitMask = 1;
4495 lastBitMask <<= 0x403E - aExp;
4496 roundBitsMask = lastBitMask - 1;
4497 z = a;
4498 roundingMode = STATUS(float_rounding_mode);
4499 if ( roundingMode == float_round_nearest_even ) {
4500 z.low += lastBitMask>>1;
4501 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4502 }
4503 else if ( roundingMode != float_round_to_zero ) {
4504 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
4505 z.low += roundBitsMask;
4506 }
4507 }
4508 z.low &= ~ roundBitsMask;
4509 if ( z.low == 0 ) {
4510 ++z.high;
4511 z.low = LIT64( 0x8000000000000000 );
4512 }
4513 if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4514 return z;
4515
4516}
4517
4518/*----------------------------------------------------------------------------
4519| Returns the result of adding the absolute values of the extended double-
4520| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4521| negated before being returned. `zSign' is ignored if the result is a NaN.
4522| The addition is performed according to the IEC/IEEE Standard for Binary
4523| Floating-Point Arithmetic.
4524*----------------------------------------------------------------------------*/
4525
4526static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4527{
4528 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004529 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004530 int32 expDiff;
4531
4532 aSig = extractFloatx80Frac( a );
4533 aExp = extractFloatx80Exp( a );
4534 bSig = extractFloatx80Frac( b );
4535 bExp = extractFloatx80Exp( b );
4536 expDiff = aExp - bExp;
4537 if ( 0 < expDiff ) {
4538 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004539 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004540 return a;
4541 }
4542 if ( bExp == 0 ) --expDiff;
4543 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4544 zExp = aExp;
4545 }
4546 else if ( expDiff < 0 ) {
4547 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004548 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004549 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4550 }
4551 if ( aExp == 0 ) ++expDiff;
4552 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4553 zExp = bExp;
4554 }
4555 else {
4556 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004557 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004558 return propagateFloatx80NaN( a, b STATUS_VAR );
4559 }
4560 return a;
4561 }
4562 zSig1 = 0;
4563 zSig0 = aSig + bSig;
4564 if ( aExp == 0 ) {
4565 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4566 goto roundAndPack;
4567 }
4568 zExp = aExp;
4569 goto shiftRight1;
4570 }
4571 zSig0 = aSig + bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004572 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
bellard158142c2005-03-13 16:54:06 +00004573 shiftRight1:
4574 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4575 zSig0 |= LIT64( 0x8000000000000000 );
4576 ++zExp;
4577 roundAndPack:
4578 return
4579 roundAndPackFloatx80(
4580 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4581
4582}
4583
4584/*----------------------------------------------------------------------------
4585| Returns the result of subtracting the absolute values of the extended
4586| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4587| difference is negated before being returned. `zSign' is ignored if the
4588| result is a NaN. The subtraction is performed according to the IEC/IEEE
4589| Standard for Binary Floating-Point Arithmetic.
4590*----------------------------------------------------------------------------*/
4591
4592static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4593{
4594 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004595 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004596 int32 expDiff;
4597 floatx80 z;
4598
4599 aSig = extractFloatx80Frac( a );
4600 aExp = extractFloatx80Exp( a );
4601 bSig = extractFloatx80Frac( b );
4602 bExp = extractFloatx80Exp( b );
4603 expDiff = aExp - bExp;
4604 if ( 0 < expDiff ) goto aExpBigger;
4605 if ( expDiff < 0 ) goto bExpBigger;
4606 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004607 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004608 return propagateFloatx80NaN( a, b STATUS_VAR );
4609 }
4610 float_raise( float_flag_invalid STATUS_VAR);
4611 z.low = floatx80_default_nan_low;
4612 z.high = floatx80_default_nan_high;
4613 return z;
4614 }
4615 if ( aExp == 0 ) {
4616 aExp = 1;
4617 bExp = 1;
4618 }
4619 zSig1 = 0;
4620 if ( bSig < aSig ) goto aBigger;
4621 if ( aSig < bSig ) goto bBigger;
4622 return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
4623 bExpBigger:
4624 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004625 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004626 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
4627 }
4628 if ( aExp == 0 ) ++expDiff;
4629 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4630 bBigger:
4631 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4632 zExp = bExp;
4633 zSign ^= 1;
4634 goto normalizeRoundAndPack;
4635 aExpBigger:
4636 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004637 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004638 return a;
4639 }
4640 if ( bExp == 0 ) --expDiff;
4641 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4642 aBigger:
4643 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4644 zExp = aExp;
4645 normalizeRoundAndPack:
4646 return
4647 normalizeRoundAndPackFloatx80(
4648 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4649
4650}
4651
4652/*----------------------------------------------------------------------------
4653| Returns the result of adding the extended double-precision floating-point
4654| values `a' and `b'. The operation is performed according to the IEC/IEEE
4655| Standard for Binary Floating-Point Arithmetic.
4656*----------------------------------------------------------------------------*/
4657
4658floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
4659{
4660 flag aSign, bSign;
4661
4662 aSign = extractFloatx80Sign( a );
4663 bSign = extractFloatx80Sign( b );
4664 if ( aSign == bSign ) {
4665 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4666 }
4667 else {
4668 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4669 }
4670
4671}
4672
4673/*----------------------------------------------------------------------------
4674| Returns the result of subtracting the extended double-precision floating-
4675| point values `a' and `b'. The operation is performed according to the
4676| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4677*----------------------------------------------------------------------------*/
4678
4679floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
4680{
4681 flag aSign, bSign;
4682
4683 aSign = extractFloatx80Sign( a );
4684 bSign = extractFloatx80Sign( b );
4685 if ( aSign == bSign ) {
4686 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
4687 }
4688 else {
4689 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
4690 }
4691
4692}
4693
4694/*----------------------------------------------------------------------------
4695| Returns the result of multiplying the extended double-precision floating-
4696| point values `a' and `b'. The operation is performed according to the
4697| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4698*----------------------------------------------------------------------------*/
4699
4700floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
4701{
4702 flag aSign, bSign, zSign;
4703 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004704 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004705 floatx80 z;
4706
4707 aSig = extractFloatx80Frac( a );
4708 aExp = extractFloatx80Exp( a );
4709 aSign = extractFloatx80Sign( a );
4710 bSig = extractFloatx80Frac( b );
4711 bExp = extractFloatx80Exp( b );
4712 bSign = extractFloatx80Sign( b );
4713 zSign = aSign ^ bSign;
4714 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004715 if ( (uint64_t) ( aSig<<1 )
4716 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00004717 return propagateFloatx80NaN( a, b STATUS_VAR );
4718 }
4719 if ( ( bExp | bSig ) == 0 ) goto invalid;
4720 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4721 }
4722 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004723 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004724 if ( ( aExp | aSig ) == 0 ) {
4725 invalid:
4726 float_raise( float_flag_invalid STATUS_VAR);
4727 z.low = floatx80_default_nan_low;
4728 z.high = floatx80_default_nan_high;
4729 return z;
4730 }
4731 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4732 }
4733 if ( aExp == 0 ) {
4734 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4735 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4736 }
4737 if ( bExp == 0 ) {
4738 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4739 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4740 }
4741 zExp = aExp + bExp - 0x3FFE;
4742 mul64To128( aSig, bSig, &zSig0, &zSig1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004743 if ( 0 < (int64_t) zSig0 ) {
bellard158142c2005-03-13 16:54:06 +00004744 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4745 --zExp;
4746 }
4747 return
4748 roundAndPackFloatx80(
4749 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4750
4751}
4752
4753/*----------------------------------------------------------------------------
4754| Returns the result of dividing the extended double-precision floating-point
4755| value `a' by the corresponding value `b'. The operation is performed
4756| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4757*----------------------------------------------------------------------------*/
4758
4759floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
4760{
4761 flag aSign, bSign, zSign;
4762 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004763 uint64_t aSig, bSig, zSig0, zSig1;
4764 uint64_t rem0, rem1, rem2, term0, term1, term2;
bellard158142c2005-03-13 16:54:06 +00004765 floatx80 z;
4766
4767 aSig = extractFloatx80Frac( a );
4768 aExp = extractFloatx80Exp( a );
4769 aSign = extractFloatx80Sign( a );
4770 bSig = extractFloatx80Frac( b );
4771 bExp = extractFloatx80Exp( b );
4772 bSign = extractFloatx80Sign( b );
4773 zSign = aSign ^ bSign;
4774 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004775 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004776 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004777 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004778 goto invalid;
4779 }
4780 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4781 }
4782 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004783 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004784 return packFloatx80( zSign, 0, 0 );
4785 }
4786 if ( bExp == 0 ) {
4787 if ( bSig == 0 ) {
4788 if ( ( aExp | aSig ) == 0 ) {
4789 invalid:
4790 float_raise( float_flag_invalid STATUS_VAR);
4791 z.low = floatx80_default_nan_low;
4792 z.high = floatx80_default_nan_high;
4793 return z;
4794 }
4795 float_raise( float_flag_divbyzero STATUS_VAR);
4796 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4797 }
4798 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4799 }
4800 if ( aExp == 0 ) {
4801 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4802 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4803 }
4804 zExp = aExp - bExp + 0x3FFE;
4805 rem1 = 0;
4806 if ( bSig <= aSig ) {
4807 shift128Right( aSig, 0, 1, &aSig, &rem1 );
4808 ++zExp;
4809 }
4810 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4811 mul64To128( bSig, zSig0, &term0, &term1 );
4812 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004813 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004814 --zSig0;
4815 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4816 }
4817 zSig1 = estimateDiv128To64( rem1, 0, bSig );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004818 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
bellard158142c2005-03-13 16:54:06 +00004819 mul64To128( bSig, zSig1, &term1, &term2 );
4820 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004821 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004822 --zSig1;
4823 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4824 }
4825 zSig1 |= ( ( rem1 | rem2 ) != 0 );
4826 }
4827 return
4828 roundAndPackFloatx80(
4829 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4830
4831}
4832
4833/*----------------------------------------------------------------------------
4834| Returns the remainder of the extended double-precision floating-point value
4835| `a' with respect to the corresponding value `b'. The operation is performed
4836| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4837*----------------------------------------------------------------------------*/
4838
4839floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
4840{
Blue Swirled086f32010-03-07 13:49:58 +00004841 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00004842 int32 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004843 uint64_t aSig0, aSig1, bSig;
4844 uint64_t q, term0, term1, alternateASig0, alternateASig1;
bellard158142c2005-03-13 16:54:06 +00004845 floatx80 z;
4846
4847 aSig0 = extractFloatx80Frac( a );
4848 aExp = extractFloatx80Exp( a );
4849 aSign = extractFloatx80Sign( a );
4850 bSig = extractFloatx80Frac( b );
4851 bExp = extractFloatx80Exp( b );
bellard158142c2005-03-13 16:54:06 +00004852 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004853 if ( (uint64_t) ( aSig0<<1 )
4854 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00004855 return propagateFloatx80NaN( a, b STATUS_VAR );
4856 }
4857 goto invalid;
4858 }
4859 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004860 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004861 return a;
4862 }
4863 if ( bExp == 0 ) {
4864 if ( bSig == 0 ) {
4865 invalid:
4866 float_raise( float_flag_invalid STATUS_VAR);
4867 z.low = floatx80_default_nan_low;
4868 z.high = floatx80_default_nan_high;
4869 return z;
4870 }
4871 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4872 }
4873 if ( aExp == 0 ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004874 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00004875 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4876 }
4877 bSig |= LIT64( 0x8000000000000000 );
4878 zSign = aSign;
4879 expDiff = aExp - bExp;
4880 aSig1 = 0;
4881 if ( expDiff < 0 ) {
4882 if ( expDiff < -1 ) return a;
4883 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4884 expDiff = 0;
4885 }
4886 q = ( bSig <= aSig0 );
4887 if ( q ) aSig0 -= bSig;
4888 expDiff -= 64;
4889 while ( 0 < expDiff ) {
4890 q = estimateDiv128To64( aSig0, aSig1, bSig );
4891 q = ( 2 < q ) ? q - 2 : 0;
4892 mul64To128( bSig, q, &term0, &term1 );
4893 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4894 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4895 expDiff -= 62;
4896 }
4897 expDiff += 64;
4898 if ( 0 < expDiff ) {
4899 q = estimateDiv128To64( aSig0, aSig1, bSig );
4900 q = ( 2 < q ) ? q - 2 : 0;
4901 q >>= 64 - expDiff;
4902 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4903 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4904 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4905 while ( le128( term0, term1, aSig0, aSig1 ) ) {
4906 ++q;
4907 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4908 }
4909 }
4910 else {
4911 term1 = 0;
4912 term0 = bSig;
4913 }
4914 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4915 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4916 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4917 && ( q & 1 ) )
4918 ) {
4919 aSig0 = alternateASig0;
4920 aSig1 = alternateASig1;
4921 zSign = ! zSign;
4922 }
4923 return
4924 normalizeRoundAndPackFloatx80(
4925 80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
4926
4927}
4928
4929/*----------------------------------------------------------------------------
4930| Returns the square root of the extended double-precision floating-point
4931| value `a'. The operation is performed according to the IEC/IEEE Standard
4932| for Binary Floating-Point Arithmetic.
4933*----------------------------------------------------------------------------*/
4934
4935floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
4936{
4937 flag aSign;
4938 int32 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004939 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4940 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00004941 floatx80 z;
4942
4943 aSig0 = extractFloatx80Frac( a );
4944 aExp = extractFloatx80Exp( a );
4945 aSign = extractFloatx80Sign( a );
4946 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004947 if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004948 if ( ! aSign ) return a;
4949 goto invalid;
4950 }
4951 if ( aSign ) {
4952 if ( ( aExp | aSig0 ) == 0 ) return a;
4953 invalid:
4954 float_raise( float_flag_invalid STATUS_VAR);
4955 z.low = floatx80_default_nan_low;
4956 z.high = floatx80_default_nan_high;
4957 return z;
4958 }
4959 if ( aExp == 0 ) {
4960 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4961 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4962 }
4963 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4964 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4965 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4966 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4967 doubleZSig0 = zSig0<<1;
4968 mul64To128( zSig0, zSig0, &term0, &term1 );
4969 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004970 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004971 --zSig0;
4972 doubleZSig0 -= 2;
4973 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4974 }
4975 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4976 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4977 if ( zSig1 == 0 ) zSig1 = 1;
4978 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4979 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4980 mul64To128( zSig1, zSig1, &term2, &term3 );
4981 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004982 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004983 --zSig1;
4984 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4985 term3 |= 1;
4986 term2 |= doubleZSig0;
4987 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4988 }
4989 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4990 }
4991 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4992 zSig0 |= doubleZSig0;
4993 return
4994 roundAndPackFloatx80(
4995 STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
4996
4997}
4998
4999/*----------------------------------------------------------------------------
Aurelien Jarnob6893622011-04-14 00:49:29 +02005000| Returns 1 if the extended double-precision floating-point value `a' is equal
5001| to the corresponding value `b', and 0 otherwise. The invalid exception is
5002| raised if either operand is a NaN. Otherwise, the comparison is performed
5003| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005004*----------------------------------------------------------------------------*/
5005
Aurelien Jarnob6893622011-04-14 00:49:29 +02005006int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005007{
5008
5009 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005010 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005011 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005012 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005013 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02005014 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00005015 return 0;
5016 }
5017 return
5018 ( a.low == b.low )
5019 && ( ( a.high == b.high )
5020 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005021 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00005022 );
5023
5024}
5025
5026/*----------------------------------------------------------------------------
5027| Returns 1 if the extended double-precision floating-point value `a' is
5028| less than or equal to the corresponding value `b', and 0 otherwise. The
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005029| invalid exception is raised if either operand is a NaN. The comparison is
5030| performed according to the IEC/IEEE Standard for Binary Floating-Point
5031| Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005032*----------------------------------------------------------------------------*/
5033
bellard750afe92006-10-28 19:27:11 +00005034int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005035{
5036 flag aSign, bSign;
5037
5038 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005039 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005040 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005041 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005042 ) {
5043 float_raise( float_flag_invalid STATUS_VAR);
5044 return 0;
5045 }
5046 aSign = extractFloatx80Sign( a );
5047 bSign = extractFloatx80Sign( b );
5048 if ( aSign != bSign ) {
5049 return
5050 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005051 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005052 == 0 );
5053 }
5054 return
5055 aSign ? le128( b.high, b.low, a.high, a.low )
5056 : le128( a.high, a.low, b.high, b.low );
5057
5058}
5059
5060/*----------------------------------------------------------------------------
5061| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005062| less than the corresponding value `b', and 0 otherwise. The invalid
5063| exception is raised if either operand is a NaN. The comparison is performed
5064| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005065*----------------------------------------------------------------------------*/
5066
bellard750afe92006-10-28 19:27:11 +00005067int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005068{
5069 flag aSign, bSign;
5070
5071 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005072 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005073 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005074 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005075 ) {
5076 float_raise( float_flag_invalid STATUS_VAR);
5077 return 0;
5078 }
5079 aSign = extractFloatx80Sign( a );
5080 bSign = extractFloatx80Sign( b );
5081 if ( aSign != bSign ) {
5082 return
5083 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005084 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005085 != 0 );
5086 }
5087 return
5088 aSign ? lt128( b.high, b.low, a.high, a.low )
5089 : lt128( a.high, a.low, b.high, b.low );
5090
5091}
5092
5093/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02005094| Returns 1 if the extended double-precision floating-point values `a' and `b'
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005095| cannot be compared, and 0 otherwise. The invalid exception is raised if
5096| either operand is a NaN. The comparison is performed according to the
5097| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02005098*----------------------------------------------------------------------------*/
5099int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
5100{
5101 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5102 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5103 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5104 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5105 ) {
5106 float_raise( float_flag_invalid STATUS_VAR);
5107 return 1;
5108 }
5109 return 0;
5110}
5111
5112/*----------------------------------------------------------------------------
Aurelien Jarnob6893622011-04-14 00:49:29 +02005113| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005114| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5115| cause an exception. The comparison is performed according to the IEC/IEEE
5116| Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005117*----------------------------------------------------------------------------*/
5118
Aurelien Jarnob6893622011-04-14 00:49:29 +02005119int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005120{
5121
5122 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005123 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005124 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005125 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005126 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02005127 if ( floatx80_is_signaling_nan( a )
5128 || floatx80_is_signaling_nan( b ) ) {
5129 float_raise( float_flag_invalid STATUS_VAR);
5130 }
bellard158142c2005-03-13 16:54:06 +00005131 return 0;
5132 }
5133 return
5134 ( a.low == b.low )
5135 && ( ( a.high == b.high )
5136 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005137 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00005138 );
5139
5140}
5141
5142/*----------------------------------------------------------------------------
5143| Returns 1 if the extended double-precision floating-point value `a' is less
5144| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5145| do not cause an exception. Otherwise, the comparison is performed according
5146| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5147*----------------------------------------------------------------------------*/
5148
bellard750afe92006-10-28 19:27:11 +00005149int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005150{
5151 flag aSign, bSign;
5152
5153 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005154 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005155 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005156 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005157 ) {
5158 if ( floatx80_is_signaling_nan( a )
5159 || floatx80_is_signaling_nan( b ) ) {
5160 float_raise( float_flag_invalid STATUS_VAR);
5161 }
5162 return 0;
5163 }
5164 aSign = extractFloatx80Sign( a );
5165 bSign = extractFloatx80Sign( b );
5166 if ( aSign != bSign ) {
5167 return
5168 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005169 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005170 == 0 );
5171 }
5172 return
5173 aSign ? le128( b.high, b.low, a.high, a.low )
5174 : le128( a.high, a.low, b.high, b.low );
5175
5176}
5177
5178/*----------------------------------------------------------------------------
5179| Returns 1 if the extended double-precision floating-point value `a' is less
5180| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5181| an exception. Otherwise, the comparison is performed according to the
5182| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5183*----------------------------------------------------------------------------*/
5184
bellard750afe92006-10-28 19:27:11 +00005185int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005186{
5187 flag aSign, bSign;
5188
5189 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005190 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005191 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005192 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005193 ) {
5194 if ( floatx80_is_signaling_nan( a )
5195 || floatx80_is_signaling_nan( b ) ) {
5196 float_raise( float_flag_invalid STATUS_VAR);
5197 }
5198 return 0;
5199 }
5200 aSign = extractFloatx80Sign( a );
5201 bSign = extractFloatx80Sign( b );
5202 if ( aSign != bSign ) {
5203 return
5204 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005205 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005206 != 0 );
5207 }
5208 return
5209 aSign ? lt128( b.high, b.low, a.high, a.low )
5210 : lt128( a.high, a.low, b.high, b.low );
5211
5212}
5213
Aurelien Jarno67b78612011-04-14 00:49:29 +02005214/*----------------------------------------------------------------------------
5215| Returns 1 if the extended double-precision floating-point values `a' and `b'
5216| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5217| The comparison is performed according to the IEC/IEEE Standard for Binary
5218| Floating-Point Arithmetic.
5219*----------------------------------------------------------------------------*/
5220int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5221{
5222 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5223 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5224 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5225 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5226 ) {
5227 if ( floatx80_is_signaling_nan( a )
5228 || floatx80_is_signaling_nan( b ) ) {
5229 float_raise( float_flag_invalid STATUS_VAR);
5230 }
5231 return 1;
5232 }
5233 return 0;
5234}
5235
bellard158142c2005-03-13 16:54:06 +00005236/*----------------------------------------------------------------------------
5237| Returns the result of converting the quadruple-precision floating-point
5238| value `a' to the 32-bit two's complement integer format. The conversion
5239| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5240| Arithmetic---which means in particular that the conversion is rounded
5241| according to the current rounding mode. If `a' is a NaN, the largest
5242| positive integer is returned. Otherwise, if the conversion overflows, the
5243| largest integer with the same sign as `a' is returned.
5244*----------------------------------------------------------------------------*/
5245
5246int32 float128_to_int32( float128 a STATUS_PARAM )
5247{
5248 flag aSign;
5249 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005250 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005251
5252 aSig1 = extractFloat128Frac1( a );
5253 aSig0 = extractFloat128Frac0( a );
5254 aExp = extractFloat128Exp( a );
5255 aSign = extractFloat128Sign( a );
5256 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5257 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5258 aSig0 |= ( aSig1 != 0 );
5259 shiftCount = 0x4028 - aExp;
5260 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5261 return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
5262
5263}
5264
5265/*----------------------------------------------------------------------------
5266| Returns the result of converting the quadruple-precision floating-point
5267| value `a' to the 32-bit two's complement integer format. The conversion
5268| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5269| Arithmetic, except that the conversion is always rounded toward zero. If
5270| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5271| conversion overflows, the largest integer with the same sign as `a' is
5272| returned.
5273*----------------------------------------------------------------------------*/
5274
5275int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
5276{
5277 flag aSign;
5278 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005279 uint64_t aSig0, aSig1, savedASig;
bellard158142c2005-03-13 16:54:06 +00005280 int32 z;
5281
5282 aSig1 = extractFloat128Frac1( a );
5283 aSig0 = extractFloat128Frac0( a );
5284 aExp = extractFloat128Exp( a );
5285 aSign = extractFloat128Sign( a );
5286 aSig0 |= ( aSig1 != 0 );
5287 if ( 0x401E < aExp ) {
5288 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5289 goto invalid;
5290 }
5291 else if ( aExp < 0x3FFF ) {
5292 if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
5293 return 0;
5294 }
5295 aSig0 |= LIT64( 0x0001000000000000 );
5296 shiftCount = 0x402F - aExp;
5297 savedASig = aSig0;
5298 aSig0 >>= shiftCount;
5299 z = aSig0;
5300 if ( aSign ) z = - z;
5301 if ( ( z < 0 ) ^ aSign ) {
5302 invalid:
5303 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01005304 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00005305 }
5306 if ( ( aSig0<<shiftCount ) != savedASig ) {
5307 STATUS(float_exception_flags) |= float_flag_inexact;
5308 }
5309 return z;
5310
5311}
5312
5313/*----------------------------------------------------------------------------
5314| Returns the result of converting the quadruple-precision floating-point
5315| value `a' to the 64-bit two's complement integer format. The conversion
5316| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5317| Arithmetic---which means in particular that the conversion is rounded
5318| according to the current rounding mode. If `a' is a NaN, the largest
5319| positive integer is returned. Otherwise, if the conversion overflows, the
5320| largest integer with the same sign as `a' is returned.
5321*----------------------------------------------------------------------------*/
5322
5323int64 float128_to_int64( float128 a STATUS_PARAM )
5324{
5325 flag aSign;
5326 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005327 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005328
5329 aSig1 = extractFloat128Frac1( a );
5330 aSig0 = extractFloat128Frac0( a );
5331 aExp = extractFloat128Exp( a );
5332 aSign = extractFloat128Sign( a );
5333 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5334 shiftCount = 0x402F - aExp;
5335 if ( shiftCount <= 0 ) {
5336 if ( 0x403E < aExp ) {
5337 float_raise( float_flag_invalid STATUS_VAR);
5338 if ( ! aSign
5339 || ( ( aExp == 0x7FFF )
5340 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5341 )
5342 ) {
5343 return LIT64( 0x7FFFFFFFFFFFFFFF );
5344 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01005345 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00005346 }
5347 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5348 }
5349 else {
5350 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5351 }
5352 return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
5353
5354}
5355
5356/*----------------------------------------------------------------------------
5357| Returns the result of converting the quadruple-precision floating-point
5358| value `a' to the 64-bit two's complement integer format. The conversion
5359| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5360| Arithmetic, except that the conversion is always rounded toward zero.
5361| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5362| the conversion overflows, the largest integer with the same sign as `a' is
5363| returned.
5364*----------------------------------------------------------------------------*/
5365
5366int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
5367{
5368 flag aSign;
5369 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005370 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005371 int64 z;
5372
5373 aSig1 = extractFloat128Frac1( a );
5374 aSig0 = extractFloat128Frac0( a );
5375 aExp = extractFloat128Exp( a );
5376 aSign = extractFloat128Sign( a );
5377 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5378 shiftCount = aExp - 0x402F;
5379 if ( 0 < shiftCount ) {
5380 if ( 0x403E <= aExp ) {
5381 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5382 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5383 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5384 if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
5385 }
5386 else {
5387 float_raise( float_flag_invalid STATUS_VAR);
5388 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5389 return LIT64( 0x7FFFFFFFFFFFFFFF );
5390 }
5391 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01005392 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00005393 }
5394 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005395 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
bellard158142c2005-03-13 16:54:06 +00005396 STATUS(float_exception_flags) |= float_flag_inexact;
5397 }
5398 }
5399 else {
5400 if ( aExp < 0x3FFF ) {
5401 if ( aExp | aSig0 | aSig1 ) {
5402 STATUS(float_exception_flags) |= float_flag_inexact;
5403 }
5404 return 0;
5405 }
5406 z = aSig0>>( - shiftCount );
5407 if ( aSig1
Andreas Färberbb98fe42011-03-07 01:34:06 +01005408 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
bellard158142c2005-03-13 16:54:06 +00005409 STATUS(float_exception_flags) |= float_flag_inexact;
5410 }
5411 }
5412 if ( aSign ) z = - z;
5413 return z;
5414
5415}
5416
5417/*----------------------------------------------------------------------------
5418| Returns the result of converting the quadruple-precision floating-point
5419| value `a' to the single-precision floating-point format. The conversion
5420| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5421| Arithmetic.
5422*----------------------------------------------------------------------------*/
5423
5424float32 float128_to_float32( float128 a STATUS_PARAM )
5425{
5426 flag aSign;
5427 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005428 uint64_t aSig0, aSig1;
5429 uint32_t zSig;
bellard158142c2005-03-13 16:54:06 +00005430
5431 aSig1 = extractFloat128Frac1( a );
5432 aSig0 = extractFloat128Frac0( a );
5433 aExp = extractFloat128Exp( a );
5434 aSign = extractFloat128Sign( a );
5435 if ( aExp == 0x7FFF ) {
5436 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005437 return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005438 }
5439 return packFloat32( aSign, 0xFF, 0 );
5440 }
5441 aSig0 |= ( aSig1 != 0 );
5442 shift64RightJamming( aSig0, 18, &aSig0 );
5443 zSig = aSig0;
5444 if ( aExp || zSig ) {
5445 zSig |= 0x40000000;
5446 aExp -= 0x3F81;
5447 }
5448 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5449
5450}
5451
5452/*----------------------------------------------------------------------------
5453| Returns the result of converting the quadruple-precision floating-point
5454| value `a' to the double-precision floating-point format. The conversion
5455| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5456| Arithmetic.
5457*----------------------------------------------------------------------------*/
5458
5459float64 float128_to_float64( float128 a STATUS_PARAM )
5460{
5461 flag aSign;
5462 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005463 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005464
5465 aSig1 = extractFloat128Frac1( a );
5466 aSig0 = extractFloat128Frac0( a );
5467 aExp = extractFloat128Exp( a );
5468 aSign = extractFloat128Sign( a );
5469 if ( aExp == 0x7FFF ) {
5470 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005471 return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005472 }
5473 return packFloat64( aSign, 0x7FF, 0 );
5474 }
5475 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5476 aSig0 |= ( aSig1 != 0 );
5477 if ( aExp || aSig0 ) {
5478 aSig0 |= LIT64( 0x4000000000000000 );
5479 aExp -= 0x3C01;
5480 }
5481 return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5482
5483}
5484
bellard158142c2005-03-13 16:54:06 +00005485/*----------------------------------------------------------------------------
5486| Returns the result of converting the quadruple-precision floating-point
5487| value `a' to the extended double-precision floating-point format. The
5488| conversion is performed according to the IEC/IEEE Standard for Binary
5489| Floating-Point Arithmetic.
5490*----------------------------------------------------------------------------*/
5491
5492floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5493{
5494 flag aSign;
5495 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005496 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005497
5498 aSig1 = extractFloat128Frac1( a );
5499 aSig0 = extractFloat128Frac0( a );
5500 aExp = extractFloat128Exp( a );
5501 aSign = extractFloat128Sign( a );
5502 if ( aExp == 0x7FFF ) {
5503 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005504 return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005505 }
5506 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5507 }
5508 if ( aExp == 0 ) {
5509 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5510 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5511 }
5512 else {
5513 aSig0 |= LIT64( 0x0001000000000000 );
5514 }
5515 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5516 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5517
5518}
5519
bellard158142c2005-03-13 16:54:06 +00005520/*----------------------------------------------------------------------------
5521| Rounds the quadruple-precision floating-point value `a' to an integer, and
5522| returns the result as a quadruple-precision floating-point value. The
5523| operation is performed according to the IEC/IEEE Standard for Binary
5524| Floating-Point Arithmetic.
5525*----------------------------------------------------------------------------*/
5526
5527float128 float128_round_to_int( float128 a STATUS_PARAM )
5528{
5529 flag aSign;
5530 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005531 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00005532 int8 roundingMode;
5533 float128 z;
5534
5535 aExp = extractFloat128Exp( a );
5536 if ( 0x402F <= aExp ) {
5537 if ( 0x406F <= aExp ) {
5538 if ( ( aExp == 0x7FFF )
5539 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5540 ) {
5541 return propagateFloat128NaN( a, a STATUS_VAR );
5542 }
5543 return a;
5544 }
5545 lastBitMask = 1;
5546 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5547 roundBitsMask = lastBitMask - 1;
5548 z = a;
5549 roundingMode = STATUS(float_rounding_mode);
5550 if ( roundingMode == float_round_nearest_even ) {
5551 if ( lastBitMask ) {
5552 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5553 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5554 }
5555 else {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005556 if ( (int64_t) z.low < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005557 ++z.high;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005558 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
bellard158142c2005-03-13 16:54:06 +00005559 }
5560 }
5561 }
5562 else if ( roundingMode != float_round_to_zero ) {
5563 if ( extractFloat128Sign( z )
5564 ^ ( roundingMode == float_round_up ) ) {
5565 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
5566 }
5567 }
5568 z.low &= ~ roundBitsMask;
5569 }
5570 else {
5571 if ( aExp < 0x3FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005572 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00005573 STATUS(float_exception_flags) |= float_flag_inexact;
5574 aSign = extractFloat128Sign( a );
5575 switch ( STATUS(float_rounding_mode) ) {
5576 case float_round_nearest_even:
5577 if ( ( aExp == 0x3FFE )
5578 && ( extractFloat128Frac0( a )
5579 | extractFloat128Frac1( a ) )
5580 ) {
5581 return packFloat128( aSign, 0x3FFF, 0, 0 );
5582 }
5583 break;
5584 case float_round_down:
5585 return
5586 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5587 : packFloat128( 0, 0, 0, 0 );
5588 case float_round_up:
5589 return
5590 aSign ? packFloat128( 1, 0, 0, 0 )
5591 : packFloat128( 0, 0x3FFF, 0, 0 );
5592 }
5593 return packFloat128( aSign, 0, 0, 0 );
5594 }
5595 lastBitMask = 1;
5596 lastBitMask <<= 0x402F - aExp;
5597 roundBitsMask = lastBitMask - 1;
5598 z.low = 0;
5599 z.high = a.high;
5600 roundingMode = STATUS(float_rounding_mode);
5601 if ( roundingMode == float_round_nearest_even ) {
5602 z.high += lastBitMask>>1;
5603 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5604 z.high &= ~ lastBitMask;
5605 }
5606 }
5607 else if ( roundingMode != float_round_to_zero ) {
5608 if ( extractFloat128Sign( z )
5609 ^ ( roundingMode == float_round_up ) ) {
5610 z.high |= ( a.low != 0 );
5611 z.high += roundBitsMask;
5612 }
5613 }
5614 z.high &= ~ roundBitsMask;
5615 }
5616 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
5617 STATUS(float_exception_flags) |= float_flag_inexact;
5618 }
5619 return z;
5620
5621}
5622
5623/*----------------------------------------------------------------------------
5624| Returns the result of adding the absolute values of the quadruple-precision
5625| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
5626| before being returned. `zSign' is ignored if the result is a NaN.
5627| The addition is performed according to the IEC/IEEE Standard for Binary
5628| Floating-Point Arithmetic.
5629*----------------------------------------------------------------------------*/
5630
5631static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5632{
5633 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005634 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
bellard158142c2005-03-13 16:54:06 +00005635 int32 expDiff;
5636
5637 aSig1 = extractFloat128Frac1( a );
5638 aSig0 = extractFloat128Frac0( a );
5639 aExp = extractFloat128Exp( a );
5640 bSig1 = extractFloat128Frac1( b );
5641 bSig0 = extractFloat128Frac0( b );
5642 bExp = extractFloat128Exp( b );
5643 expDiff = aExp - bExp;
5644 if ( 0 < expDiff ) {
5645 if ( aExp == 0x7FFF ) {
5646 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5647 return a;
5648 }
5649 if ( bExp == 0 ) {
5650 --expDiff;
5651 }
5652 else {
5653 bSig0 |= LIT64( 0x0001000000000000 );
5654 }
5655 shift128ExtraRightJamming(
5656 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5657 zExp = aExp;
5658 }
5659 else if ( expDiff < 0 ) {
5660 if ( bExp == 0x7FFF ) {
5661 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5662 return packFloat128( zSign, 0x7FFF, 0, 0 );
5663 }
5664 if ( aExp == 0 ) {
5665 ++expDiff;
5666 }
5667 else {
5668 aSig0 |= LIT64( 0x0001000000000000 );
5669 }
5670 shift128ExtraRightJamming(
5671 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5672 zExp = bExp;
5673 }
5674 else {
5675 if ( aExp == 0x7FFF ) {
5676 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5677 return propagateFloat128NaN( a, b STATUS_VAR );
5678 }
5679 return a;
5680 }
5681 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
pbrookfe76d972008-12-19 14:33:59 +00005682 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01005683 if (STATUS(flush_to_zero)) {
5684 if (zSig0 | zSig1) {
5685 float_raise(float_flag_output_denormal STATUS_VAR);
5686 }
5687 return packFloat128(zSign, 0, 0, 0);
5688 }
pbrookfe76d972008-12-19 14:33:59 +00005689 return packFloat128( zSign, 0, zSig0, zSig1 );
5690 }
bellard158142c2005-03-13 16:54:06 +00005691 zSig2 = 0;
5692 zSig0 |= LIT64( 0x0002000000000000 );
5693 zExp = aExp;
5694 goto shiftRight1;
5695 }
5696 aSig0 |= LIT64( 0x0001000000000000 );
5697 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5698 --zExp;
5699 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
5700 ++zExp;
5701 shiftRight1:
5702 shift128ExtraRightJamming(
5703 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5704 roundAndPack:
5705 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5706
5707}
5708
5709/*----------------------------------------------------------------------------
5710| Returns the result of subtracting the absolute values of the quadruple-
5711| precision floating-point values `a' and `b'. If `zSign' is 1, the
5712| difference is negated before being returned. `zSign' is ignored if the
5713| result is a NaN. The subtraction is performed according to the IEC/IEEE
5714| Standard for Binary Floating-Point Arithmetic.
5715*----------------------------------------------------------------------------*/
5716
5717static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
5718{
5719 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005720 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00005721 int32 expDiff;
5722 float128 z;
5723
5724 aSig1 = extractFloat128Frac1( a );
5725 aSig0 = extractFloat128Frac0( a );
5726 aExp = extractFloat128Exp( a );
5727 bSig1 = extractFloat128Frac1( b );
5728 bSig0 = extractFloat128Frac0( b );
5729 bExp = extractFloat128Exp( b );
5730 expDiff = aExp - bExp;
5731 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5732 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
5733 if ( 0 < expDiff ) goto aExpBigger;
5734 if ( expDiff < 0 ) goto bExpBigger;
5735 if ( aExp == 0x7FFF ) {
5736 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
5737 return propagateFloat128NaN( a, b STATUS_VAR );
5738 }
5739 float_raise( float_flag_invalid STATUS_VAR);
5740 z.low = float128_default_nan_low;
5741 z.high = float128_default_nan_high;
5742 return z;
5743 }
5744 if ( aExp == 0 ) {
5745 aExp = 1;
5746 bExp = 1;
5747 }
5748 if ( bSig0 < aSig0 ) goto aBigger;
5749 if ( aSig0 < bSig0 ) goto bBigger;
5750 if ( bSig1 < aSig1 ) goto aBigger;
5751 if ( aSig1 < bSig1 ) goto bBigger;
5752 return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
5753 bExpBigger:
5754 if ( bExp == 0x7FFF ) {
5755 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5756 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
5757 }
5758 if ( aExp == 0 ) {
5759 ++expDiff;
5760 }
5761 else {
5762 aSig0 |= LIT64( 0x4000000000000000 );
5763 }
5764 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5765 bSig0 |= LIT64( 0x4000000000000000 );
5766 bBigger:
5767 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
5768 zExp = bExp;
5769 zSign ^= 1;
5770 goto normalizeRoundAndPack;
5771 aExpBigger:
5772 if ( aExp == 0x7FFF ) {
5773 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5774 return a;
5775 }
5776 if ( bExp == 0 ) {
5777 --expDiff;
5778 }
5779 else {
5780 bSig0 |= LIT64( 0x4000000000000000 );
5781 }
5782 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
5783 aSig0 |= LIT64( 0x4000000000000000 );
5784 aBigger:
5785 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
5786 zExp = aExp;
5787 normalizeRoundAndPack:
5788 --zExp;
5789 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
5790
5791}
5792
5793/*----------------------------------------------------------------------------
5794| Returns the result of adding the quadruple-precision floating-point values
5795| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
5796| for Binary Floating-Point Arithmetic.
5797*----------------------------------------------------------------------------*/
5798
5799float128 float128_add( float128 a, float128 b STATUS_PARAM )
5800{
5801 flag aSign, bSign;
5802
5803 aSign = extractFloat128Sign( a );
5804 bSign = extractFloat128Sign( b );
5805 if ( aSign == bSign ) {
5806 return addFloat128Sigs( a, b, aSign STATUS_VAR );
5807 }
5808 else {
5809 return subFloat128Sigs( a, b, aSign STATUS_VAR );
5810 }
5811
5812}
5813
5814/*----------------------------------------------------------------------------
5815| Returns the result of subtracting the quadruple-precision floating-point
5816| values `a' and `b'. The operation is performed according to the IEC/IEEE
5817| Standard for Binary Floating-Point Arithmetic.
5818*----------------------------------------------------------------------------*/
5819
5820float128 float128_sub( float128 a, float128 b STATUS_PARAM )
5821{
5822 flag aSign, bSign;
5823
5824 aSign = extractFloat128Sign( a );
5825 bSign = extractFloat128Sign( b );
5826 if ( aSign == bSign ) {
5827 return subFloat128Sigs( a, b, aSign STATUS_VAR );
5828 }
5829 else {
5830 return addFloat128Sigs( a, b, aSign STATUS_VAR );
5831 }
5832
5833}
5834
5835/*----------------------------------------------------------------------------
5836| Returns the result of multiplying the quadruple-precision floating-point
5837| values `a' and `b'. The operation is performed according to the IEC/IEEE
5838| Standard for Binary Floating-Point Arithmetic.
5839*----------------------------------------------------------------------------*/
5840
5841float128 float128_mul( float128 a, float128 b STATUS_PARAM )
5842{
5843 flag aSign, bSign, zSign;
5844 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005845 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
bellard158142c2005-03-13 16:54:06 +00005846 float128 z;
5847
5848 aSig1 = extractFloat128Frac1( a );
5849 aSig0 = extractFloat128Frac0( a );
5850 aExp = extractFloat128Exp( a );
5851 aSign = extractFloat128Sign( a );
5852 bSig1 = extractFloat128Frac1( b );
5853 bSig0 = extractFloat128Frac0( b );
5854 bExp = extractFloat128Exp( b );
5855 bSign = extractFloat128Sign( b );
5856 zSign = aSign ^ bSign;
5857 if ( aExp == 0x7FFF ) {
5858 if ( ( aSig0 | aSig1 )
5859 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5860 return propagateFloat128NaN( a, b STATUS_VAR );
5861 }
5862 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
5863 return packFloat128( zSign, 0x7FFF, 0, 0 );
5864 }
5865 if ( bExp == 0x7FFF ) {
5866 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5867 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5868 invalid:
5869 float_raise( float_flag_invalid STATUS_VAR);
5870 z.low = float128_default_nan_low;
5871 z.high = float128_default_nan_high;
5872 return z;
5873 }
5874 return packFloat128( zSign, 0x7FFF, 0, 0 );
5875 }
5876 if ( aExp == 0 ) {
5877 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5878 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5879 }
5880 if ( bExp == 0 ) {
5881 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5882 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5883 }
5884 zExp = aExp + bExp - 0x4000;
5885 aSig0 |= LIT64( 0x0001000000000000 );
5886 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5887 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5888 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5889 zSig2 |= ( zSig3 != 0 );
5890 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5891 shift128ExtraRightJamming(
5892 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5893 ++zExp;
5894 }
5895 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5896
5897}
5898
5899/*----------------------------------------------------------------------------
5900| Returns the result of dividing the quadruple-precision floating-point value
5901| `a' by the corresponding value `b'. The operation is performed according to
5902| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5903*----------------------------------------------------------------------------*/
5904
5905float128 float128_div( float128 a, float128 b STATUS_PARAM )
5906{
5907 flag aSign, bSign, zSign;
5908 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005909 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5910 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00005911 float128 z;
5912
5913 aSig1 = extractFloat128Frac1( a );
5914 aSig0 = extractFloat128Frac0( a );
5915 aExp = extractFloat128Exp( a );
5916 aSign = extractFloat128Sign( a );
5917 bSig1 = extractFloat128Frac1( b );
5918 bSig0 = extractFloat128Frac0( b );
5919 bExp = extractFloat128Exp( b );
5920 bSign = extractFloat128Sign( b );
5921 zSign = aSign ^ bSign;
5922 if ( aExp == 0x7FFF ) {
5923 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5924 if ( bExp == 0x7FFF ) {
5925 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5926 goto invalid;
5927 }
5928 return packFloat128( zSign, 0x7FFF, 0, 0 );
5929 }
5930 if ( bExp == 0x7FFF ) {
5931 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
5932 return packFloat128( zSign, 0, 0, 0 );
5933 }
5934 if ( bExp == 0 ) {
5935 if ( ( bSig0 | bSig1 ) == 0 ) {
5936 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5937 invalid:
5938 float_raise( float_flag_invalid STATUS_VAR);
5939 z.low = float128_default_nan_low;
5940 z.high = float128_default_nan_high;
5941 return z;
5942 }
5943 float_raise( float_flag_divbyzero STATUS_VAR);
5944 return packFloat128( zSign, 0x7FFF, 0, 0 );
5945 }
5946 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5947 }
5948 if ( aExp == 0 ) {
5949 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5950 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5951 }
5952 zExp = aExp - bExp + 0x3FFD;
5953 shortShift128Left(
5954 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5955 shortShift128Left(
5956 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5957 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5958 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5959 ++zExp;
5960 }
5961 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5962 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5963 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005964 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005965 --zSig0;
5966 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5967 }
5968 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5969 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5970 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5971 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005972 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005973 --zSig1;
5974 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5975 }
5976 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5977 }
5978 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5979 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
5980
5981}
5982
5983/*----------------------------------------------------------------------------
5984| Returns the remainder of the quadruple-precision floating-point value `a'
5985| with respect to the corresponding value `b'. The operation is performed
5986| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5987*----------------------------------------------------------------------------*/
5988
5989float128 float128_rem( float128 a, float128 b STATUS_PARAM )
5990{
Blue Swirled086f32010-03-07 13:49:58 +00005991 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00005992 int32 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005993 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5994 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
5995 int64_t sigMean0;
bellard158142c2005-03-13 16:54:06 +00005996 float128 z;
5997
5998 aSig1 = extractFloat128Frac1( a );
5999 aSig0 = extractFloat128Frac0( a );
6000 aExp = extractFloat128Exp( a );
6001 aSign = extractFloat128Sign( a );
6002 bSig1 = extractFloat128Frac1( b );
6003 bSig0 = extractFloat128Frac0( b );
6004 bExp = extractFloat128Exp( b );
bellard158142c2005-03-13 16:54:06 +00006005 if ( aExp == 0x7FFF ) {
6006 if ( ( aSig0 | aSig1 )
6007 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6008 return propagateFloat128NaN( a, b STATUS_VAR );
6009 }
6010 goto invalid;
6011 }
6012 if ( bExp == 0x7FFF ) {
6013 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6014 return a;
6015 }
6016 if ( bExp == 0 ) {
6017 if ( ( bSig0 | bSig1 ) == 0 ) {
6018 invalid:
6019 float_raise( float_flag_invalid STATUS_VAR);
6020 z.low = float128_default_nan_low;
6021 z.high = float128_default_nan_high;
6022 return z;
6023 }
6024 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6025 }
6026 if ( aExp == 0 ) {
6027 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6028 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6029 }
6030 expDiff = aExp - bExp;
6031 if ( expDiff < -1 ) return a;
6032 shortShift128Left(
6033 aSig0 | LIT64( 0x0001000000000000 ),
6034 aSig1,
6035 15 - ( expDiff < 0 ),
6036 &aSig0,
6037 &aSig1
6038 );
6039 shortShift128Left(
6040 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6041 q = le128( bSig0, bSig1, aSig0, aSig1 );
6042 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6043 expDiff -= 64;
6044 while ( 0 < expDiff ) {
6045 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6046 q = ( 4 < q ) ? q - 4 : 0;
6047 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6048 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6049 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6050 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6051 expDiff -= 61;
6052 }
6053 if ( -64 < expDiff ) {
6054 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6055 q = ( 4 < q ) ? q - 4 : 0;
6056 q >>= - expDiff;
6057 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6058 expDiff += 52;
6059 if ( expDiff < 0 ) {
6060 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6061 }
6062 else {
6063 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6064 }
6065 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6066 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6067 }
6068 else {
6069 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6070 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6071 }
6072 do {
6073 alternateASig0 = aSig0;
6074 alternateASig1 = aSig1;
6075 ++q;
6076 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006077 } while ( 0 <= (int64_t) aSig0 );
bellard158142c2005-03-13 16:54:06 +00006078 add128(
Andreas Färberbb98fe42011-03-07 01:34:06 +01006079 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
bellard158142c2005-03-13 16:54:06 +00006080 if ( ( sigMean0 < 0 )
6081 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6082 aSig0 = alternateASig0;
6083 aSig1 = alternateASig1;
6084 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01006085 zSign = ( (int64_t) aSig0 < 0 );
bellard158142c2005-03-13 16:54:06 +00006086 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6087 return
6088 normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
6089
6090}
6091
6092/*----------------------------------------------------------------------------
6093| Returns the square root of the quadruple-precision floating-point value `a'.
6094| The operation is performed according to the IEC/IEEE Standard for Binary
6095| Floating-Point Arithmetic.
6096*----------------------------------------------------------------------------*/
6097
6098float128 float128_sqrt( float128 a STATUS_PARAM )
6099{
6100 flag aSign;
6101 int32 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006102 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6103 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00006104 float128 z;
6105
6106 aSig1 = extractFloat128Frac1( a );
6107 aSig0 = extractFloat128Frac0( a );
6108 aExp = extractFloat128Exp( a );
6109 aSign = extractFloat128Sign( a );
6110 if ( aExp == 0x7FFF ) {
6111 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
6112 if ( ! aSign ) return a;
6113 goto invalid;
6114 }
6115 if ( aSign ) {
6116 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6117 invalid:
6118 float_raise( float_flag_invalid STATUS_VAR);
6119 z.low = float128_default_nan_low;
6120 z.high = float128_default_nan_high;
6121 return z;
6122 }
6123 if ( aExp == 0 ) {
6124 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6125 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6126 }
6127 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6128 aSig0 |= LIT64( 0x0001000000000000 );
6129 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6130 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6131 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6132 doubleZSig0 = zSig0<<1;
6133 mul64To128( zSig0, zSig0, &term0, &term1 );
6134 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006135 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006136 --zSig0;
6137 doubleZSig0 -= 2;
6138 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6139 }
6140 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6141 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6142 if ( zSig1 == 0 ) zSig1 = 1;
6143 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6144 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6145 mul64To128( zSig1, zSig1, &term2, &term3 );
6146 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006147 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006148 --zSig1;
6149 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6150 term3 |= 1;
6151 term2 |= doubleZSig0;
6152 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6153 }
6154 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6155 }
6156 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6157 return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6158
6159}
6160
6161/*----------------------------------------------------------------------------
6162| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarnob6893622011-04-14 00:49:29 +02006163| the corresponding value `b', and 0 otherwise. The invalid exception is
6164| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00006165| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6166*----------------------------------------------------------------------------*/
6167
Aurelien Jarnob6893622011-04-14 00:49:29 +02006168int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006169{
6170
6171 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6172 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6173 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6174 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6175 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02006176 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00006177 return 0;
6178 }
6179 return
6180 ( a.low == b.low )
6181 && ( ( a.high == b.high )
6182 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01006183 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00006184 );
6185
6186}
6187
6188/*----------------------------------------------------------------------------
6189| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006190| or equal to the corresponding value `b', and 0 otherwise. The invalid
6191| exception is raised if either operand is a NaN. The comparison is performed
6192| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006193*----------------------------------------------------------------------------*/
6194
bellard750afe92006-10-28 19:27:11 +00006195int float128_le( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006196{
6197 flag aSign, bSign;
6198
6199 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6200 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6201 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6202 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6203 ) {
6204 float_raise( float_flag_invalid STATUS_VAR);
6205 return 0;
6206 }
6207 aSign = extractFloat128Sign( a );
6208 bSign = extractFloat128Sign( b );
6209 if ( aSign != bSign ) {
6210 return
6211 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006212 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006213 == 0 );
6214 }
6215 return
6216 aSign ? le128( b.high, b.low, a.high, a.low )
6217 : le128( a.high, a.low, b.high, b.low );
6218
6219}
6220
6221/*----------------------------------------------------------------------------
6222| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006223| the corresponding value `b', and 0 otherwise. The invalid exception is
6224| raised if either operand is a NaN. The comparison is performed according
6225| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006226*----------------------------------------------------------------------------*/
6227
bellard750afe92006-10-28 19:27:11 +00006228int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006229{
6230 flag aSign, bSign;
6231
6232 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6233 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6234 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6235 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6236 ) {
6237 float_raise( float_flag_invalid STATUS_VAR);
6238 return 0;
6239 }
6240 aSign = extractFloat128Sign( a );
6241 bSign = extractFloat128Sign( b );
6242 if ( aSign != bSign ) {
6243 return
6244 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006245 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006246 != 0 );
6247 }
6248 return
6249 aSign ? lt128( b.high, b.low, a.high, a.low )
6250 : lt128( a.high, a.low, b.high, b.low );
6251
6252}
6253
6254/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02006255| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006256| be compared, and 0 otherwise. The invalid exception is raised if either
6257| operand is a NaN. The comparison is performed according to the IEC/IEEE
6258| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02006259*----------------------------------------------------------------------------*/
6260
6261int float128_unordered( float128 a, float128 b STATUS_PARAM )
6262{
6263 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6264 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6265 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6266 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6267 ) {
6268 float_raise( float_flag_invalid STATUS_VAR);
6269 return 1;
6270 }
6271 return 0;
6272}
6273
6274/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00006275| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006276| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6277| exception. The comparison is performed according to the IEC/IEEE Standard
6278| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006279*----------------------------------------------------------------------------*/
6280
Aurelien Jarnob6893622011-04-14 00:49:29 +02006281int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006282{
6283
6284 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6285 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6286 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6287 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6288 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02006289 if ( float128_is_signaling_nan( a )
6290 || float128_is_signaling_nan( b ) ) {
6291 float_raise( float_flag_invalid STATUS_VAR);
6292 }
bellard158142c2005-03-13 16:54:06 +00006293 return 0;
6294 }
6295 return
6296 ( a.low == b.low )
6297 && ( ( a.high == b.high )
6298 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01006299 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00006300 );
6301
6302}
6303
6304/*----------------------------------------------------------------------------
6305| Returns 1 if the quadruple-precision floating-point value `a' is less than
6306| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6307| cause an exception. Otherwise, the comparison is performed according to the
6308| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6309*----------------------------------------------------------------------------*/
6310
bellard750afe92006-10-28 19:27:11 +00006311int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006312{
6313 flag aSign, bSign;
6314
6315 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6316 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6317 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6318 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6319 ) {
6320 if ( float128_is_signaling_nan( a )
6321 || float128_is_signaling_nan( b ) ) {
6322 float_raise( float_flag_invalid STATUS_VAR);
6323 }
6324 return 0;
6325 }
6326 aSign = extractFloat128Sign( a );
6327 bSign = extractFloat128Sign( b );
6328 if ( aSign != bSign ) {
6329 return
6330 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006331 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006332 == 0 );
6333 }
6334 return
6335 aSign ? le128( b.high, b.low, a.high, a.low )
6336 : le128( a.high, a.low, b.high, b.low );
6337
6338}
6339
6340/*----------------------------------------------------------------------------
6341| Returns 1 if the quadruple-precision floating-point value `a' is less than
6342| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6343| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6344| Standard for Binary Floating-Point Arithmetic.
6345*----------------------------------------------------------------------------*/
6346
bellard750afe92006-10-28 19:27:11 +00006347int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006348{
6349 flag aSign, bSign;
6350
6351 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6352 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6353 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6354 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6355 ) {
6356 if ( float128_is_signaling_nan( a )
6357 || float128_is_signaling_nan( b ) ) {
6358 float_raise( float_flag_invalid STATUS_VAR);
6359 }
6360 return 0;
6361 }
6362 aSign = extractFloat128Sign( a );
6363 bSign = extractFloat128Sign( b );
6364 if ( aSign != bSign ) {
6365 return
6366 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006367 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006368 != 0 );
6369 }
6370 return
6371 aSign ? lt128( b.high, b.low, a.high, a.low )
6372 : lt128( a.high, a.low, b.high, b.low );
6373
6374}
6375
Aurelien Jarno67b78612011-04-14 00:49:29 +02006376/*----------------------------------------------------------------------------
6377| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6378| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6379| comparison is performed according to the IEC/IEEE Standard for Binary
6380| Floating-Point Arithmetic.
6381*----------------------------------------------------------------------------*/
6382
6383int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
6384{
6385 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6386 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6387 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6388 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6389 ) {
6390 if ( float128_is_signaling_nan( a )
6391 || float128_is_signaling_nan( b ) ) {
6392 float_raise( float_flag_invalid STATUS_VAR);
6393 }
6394 return 1;
6395 }
6396 return 0;
6397}
6398
bellard1d6bda32005-03-13 18:52:29 +00006399/* misc functions */
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006400float32 uint32_to_float32( uint32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006401{
6402 return int64_to_float32(a STATUS_VAR);
6403}
6404
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006405float64 uint32_to_float64( uint32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006406{
6407 return int64_to_float64(a STATUS_VAR);
6408}
6409
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006410uint32 float32_to_uint32( float32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006411{
6412 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006413 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006414
6415 v = float32_to_int64(a STATUS_VAR);
6416 if (v < 0) {
6417 res = 0;
6418 float_raise( float_flag_invalid STATUS_VAR);
6419 } else if (v > 0xffffffff) {
6420 res = 0xffffffff;
6421 float_raise( float_flag_invalid STATUS_VAR);
6422 } else {
6423 res = v;
6424 }
6425 return res;
6426}
6427
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006428uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006429{
6430 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006431 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006432
6433 v = float32_to_int64_round_to_zero(a STATUS_VAR);
6434 if (v < 0) {
6435 res = 0;
6436 float_raise( float_flag_invalid STATUS_VAR);
6437 } else if (v > 0xffffffff) {
6438 res = 0xffffffff;
6439 float_raise( float_flag_invalid STATUS_VAR);
6440 } else {
6441 res = v;
6442 }
6443 return res;
6444}
6445
Andreas Färber38641f82011-08-28 20:24:33 +02006446uint16 float32_to_uint16_round_to_zero( float32 a STATUS_PARAM )
Peter Maydellcbcef452010-12-07 15:37:34 +00006447{
6448 int64_t v;
Andreas Färber38641f82011-08-28 20:24:33 +02006449 uint16 res;
Peter Maydellcbcef452010-12-07 15:37:34 +00006450
6451 v = float32_to_int64_round_to_zero(a STATUS_VAR);
6452 if (v < 0) {
6453 res = 0;
6454 float_raise( float_flag_invalid STATUS_VAR);
6455 } else if (v > 0xffff) {
6456 res = 0xffff;
6457 float_raise( float_flag_invalid STATUS_VAR);
6458 } else {
6459 res = v;
6460 }
6461 return res;
6462}
6463
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006464uint32 float64_to_uint32( float64 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006465{
6466 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006467 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006468
6469 v = float64_to_int64(a STATUS_VAR);
6470 if (v < 0) {
6471 res = 0;
6472 float_raise( float_flag_invalid STATUS_VAR);
6473 } else if (v > 0xffffffff) {
6474 res = 0xffffffff;
6475 float_raise( float_flag_invalid STATUS_VAR);
6476 } else {
6477 res = v;
6478 }
6479 return res;
6480}
6481
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006482uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006483{
6484 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006485 uint32 res;
bellard1d6bda32005-03-13 18:52:29 +00006486
6487 v = float64_to_int64_round_to_zero(a STATUS_VAR);
6488 if (v < 0) {
6489 res = 0;
6490 float_raise( float_flag_invalid STATUS_VAR);
6491 } else if (v > 0xffffffff) {
6492 res = 0xffffffff;
6493 float_raise( float_flag_invalid STATUS_VAR);
6494 } else {
6495 res = v;
6496 }
6497 return res;
6498}
6499
Andreas Färber38641f82011-08-28 20:24:33 +02006500uint16 float64_to_uint16_round_to_zero( float64 a STATUS_PARAM )
Peter Maydellcbcef452010-12-07 15:37:34 +00006501{
6502 int64_t v;
Andreas Färber38641f82011-08-28 20:24:33 +02006503 uint16 res;
Peter Maydellcbcef452010-12-07 15:37:34 +00006504
6505 v = float64_to_int64_round_to_zero(a STATUS_VAR);
6506 if (v < 0) {
6507 res = 0;
6508 float_raise( float_flag_invalid STATUS_VAR);
6509 } else if (v > 0xffff) {
6510 res = 0xffff;
6511 float_raise( float_flag_invalid STATUS_VAR);
6512 } else {
6513 res = v;
6514 }
6515 return res;
6516}
6517
pbrookf090c9d2007-11-18 14:33:24 +00006518/* FIXME: This looks broken. */
j_mayer75d62a52007-03-20 22:10:42 +00006519uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
6520{
6521 int64_t v;
6522
pbrookf090c9d2007-11-18 14:33:24 +00006523 v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6524 v += float64_val(a);
6525 v = float64_to_int64(make_float64(v) STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00006526
6527 return v - INT64_MIN;
6528}
6529
6530uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
6531{
6532 int64_t v;
6533
pbrookf090c9d2007-11-18 14:33:24 +00006534 v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
6535 v += float64_val(a);
6536 v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00006537
6538 return v - INT64_MIN;
6539}
6540
bellard1d6bda32005-03-13 18:52:29 +00006541#define COMPARE(s, nan_exp) \
bellard750afe92006-10-28 19:27:11 +00006542INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard1d6bda32005-03-13 18:52:29 +00006543 int is_quiet STATUS_PARAM ) \
6544{ \
6545 flag aSign, bSign; \
Andreas Färberbb98fe42011-03-07 01:34:06 +01006546 uint ## s ## _t av, bv; \
Peter Maydell37d18662011-01-06 19:37:53 +00006547 a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
6548 b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
bellard1d6bda32005-03-13 18:52:29 +00006549 \
6550 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
6551 extractFloat ## s ## Frac( a ) ) || \
6552 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
6553 extractFloat ## s ## Frac( b ) )) { \
6554 if (!is_quiet || \
6555 float ## s ## _is_signaling_nan( a ) || \
6556 float ## s ## _is_signaling_nan( b ) ) { \
6557 float_raise( float_flag_invalid STATUS_VAR); \
6558 } \
6559 return float_relation_unordered; \
6560 } \
6561 aSign = extractFloat ## s ## Sign( a ); \
6562 bSign = extractFloat ## s ## Sign( b ); \
pbrookf090c9d2007-11-18 14:33:24 +00006563 av = float ## s ## _val(a); \
blueswir1cd8a2532007-11-21 18:57:44 +00006564 bv = float ## s ## _val(b); \
bellard1d6bda32005-03-13 18:52:29 +00006565 if ( aSign != bSign ) { \
Andreas Färberbb98fe42011-03-07 01:34:06 +01006566 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
bellard1d6bda32005-03-13 18:52:29 +00006567 /* zero case */ \
6568 return float_relation_equal; \
6569 } else { \
6570 return 1 - (2 * aSign); \
6571 } \
6572 } else { \
pbrookf090c9d2007-11-18 14:33:24 +00006573 if (av == bv) { \
bellard1d6bda32005-03-13 18:52:29 +00006574 return float_relation_equal; \
6575 } else { \
pbrookf090c9d2007-11-18 14:33:24 +00006576 return 1 - 2 * (aSign ^ ( av < bv )); \
bellard1d6bda32005-03-13 18:52:29 +00006577 } \
6578 } \
6579} \
6580 \
bellard750afe92006-10-28 19:27:11 +00006581int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard1d6bda32005-03-13 18:52:29 +00006582{ \
6583 return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
6584} \
6585 \
bellard750afe92006-10-28 19:27:11 +00006586int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard1d6bda32005-03-13 18:52:29 +00006587{ \
6588 return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
6589}
6590
6591COMPARE(32, 0xff)
6592COMPARE(64, 0x7ff)
pbrook9ee6e8b2007-11-11 00:04:49 +00006593
Aurelien Jarnof6714d32011-04-20 13:04:22 +02006594INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
6595 int is_quiet STATUS_PARAM )
6596{
6597 flag aSign, bSign;
6598
6599 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6600 ( extractFloatx80Frac( a )<<1 ) ) ||
6601 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6602 ( extractFloatx80Frac( b )<<1 ) )) {
6603 if (!is_quiet ||
6604 floatx80_is_signaling_nan( a ) ||
6605 floatx80_is_signaling_nan( b ) ) {
6606 float_raise( float_flag_invalid STATUS_VAR);
6607 }
6608 return float_relation_unordered;
6609 }
6610 aSign = extractFloatx80Sign( a );
6611 bSign = extractFloatx80Sign( b );
6612 if ( aSign != bSign ) {
6613
6614 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6615 ( ( a.low | b.low ) == 0 ) ) {
6616 /* zero case */
6617 return float_relation_equal;
6618 } else {
6619 return 1 - (2 * aSign);
6620 }
6621 } else {
6622 if (a.low == b.low && a.high == b.high) {
6623 return float_relation_equal;
6624 } else {
6625 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6626 }
6627 }
6628}
6629
6630int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
6631{
6632 return floatx80_compare_internal(a, b, 0 STATUS_VAR);
6633}
6634
6635int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
6636{
6637 return floatx80_compare_internal(a, b, 1 STATUS_VAR);
6638}
6639
blueswir11f587322007-11-25 18:40:20 +00006640INLINE int float128_compare_internal( float128 a, float128 b,
6641 int is_quiet STATUS_PARAM )
6642{
6643 flag aSign, bSign;
6644
6645 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6646 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6647 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6648 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6649 if (!is_quiet ||
6650 float128_is_signaling_nan( a ) ||
6651 float128_is_signaling_nan( b ) ) {
6652 float_raise( float_flag_invalid STATUS_VAR);
6653 }
6654 return float_relation_unordered;
6655 }
6656 aSign = extractFloat128Sign( a );
6657 bSign = extractFloat128Sign( b );
6658 if ( aSign != bSign ) {
6659 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6660 /* zero case */
6661 return float_relation_equal;
6662 } else {
6663 return 1 - (2 * aSign);
6664 }
6665 } else {
6666 if (a.low == b.low && a.high == b.high) {
6667 return float_relation_equal;
6668 } else {
6669 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6670 }
6671 }
6672}
6673
6674int float128_compare( float128 a, float128 b STATUS_PARAM )
6675{
6676 return float128_compare_internal(a, b, 0 STATUS_VAR);
6677}
6678
6679int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
6680{
6681 return float128_compare_internal(a, b, 1 STATUS_VAR);
6682}
6683
Peter Maydell274f1b02011-03-11 08:12:25 +00006684/* min() and max() functions. These can't be implemented as
6685 * 'compare and pick one input' because that would mishandle
6686 * NaNs and +0 vs -0.
6687 */
6688#define MINMAX(s, nan_exp) \
6689INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
6690 int ismin STATUS_PARAM ) \
6691{ \
6692 flag aSign, bSign; \
6693 uint ## s ## _t av, bv; \
6694 a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
6695 b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
6696 if (float ## s ## _is_any_nan(a) || \
6697 float ## s ## _is_any_nan(b)) { \
6698 return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
6699 } \
6700 aSign = extractFloat ## s ## Sign(a); \
6701 bSign = extractFloat ## s ## Sign(b); \
6702 av = float ## s ## _val(a); \
6703 bv = float ## s ## _val(b); \
6704 if (aSign != bSign) { \
6705 if (ismin) { \
6706 return aSign ? a : b; \
6707 } else { \
6708 return aSign ? b : a; \
6709 } \
6710 } else { \
6711 if (ismin) { \
6712 return (aSign ^ (av < bv)) ? a : b; \
6713 } else { \
6714 return (aSign ^ (av < bv)) ? b : a; \
6715 } \
6716 } \
6717} \
6718 \
6719float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
6720{ \
6721 return float ## s ## _minmax(a, b, 1 STATUS_VAR); \
6722} \
6723 \
6724float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
6725{ \
6726 return float ## s ## _minmax(a, b, 0 STATUS_VAR); \
6727}
6728
6729MINMAX(32, 0xff)
6730MINMAX(64, 0x7ff)
6731
6732
pbrook9ee6e8b2007-11-11 00:04:49 +00006733/* Multiply A by 2 raised to the power N. */
6734float32 float32_scalbn( float32 a, int n STATUS_PARAM )
6735{
6736 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006737 int16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006738 uint32_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00006739
Peter Maydell37d18662011-01-06 19:37:53 +00006740 a = float32_squash_input_denormal(a STATUS_VAR);
pbrook9ee6e8b2007-11-11 00:04:49 +00006741 aSig = extractFloat32Frac( a );
6742 aExp = extractFloat32Exp( a );
6743 aSign = extractFloat32Sign( a );
6744
6745 if ( aExp == 0xFF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006746 if ( aSig ) {
6747 return propagateFloat32NaN( a, a STATUS_VAR );
6748 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006749 return a;
6750 }
pbrook69397542008-12-19 12:59:28 +00006751 if ( aExp != 0 )
6752 aSig |= 0x00800000;
6753 else if ( aSig == 0 )
6754 return a;
6755
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006756 if (n > 0x200) {
6757 n = 0x200;
6758 } else if (n < -0x200) {
6759 n = -0x200;
6760 }
6761
pbrook69397542008-12-19 12:59:28 +00006762 aExp += n - 1;
6763 aSig <<= 7;
6764 return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006765}
6766
6767float64 float64_scalbn( float64 a, int n STATUS_PARAM )
6768{
6769 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006770 int16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006771 uint64_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00006772
Peter Maydell37d18662011-01-06 19:37:53 +00006773 a = float64_squash_input_denormal(a STATUS_VAR);
pbrook9ee6e8b2007-11-11 00:04:49 +00006774 aSig = extractFloat64Frac( a );
6775 aExp = extractFloat64Exp( a );
6776 aSign = extractFloat64Sign( a );
6777
6778 if ( aExp == 0x7FF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006779 if ( aSig ) {
6780 return propagateFloat64NaN( a, a STATUS_VAR );
6781 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006782 return a;
6783 }
pbrook69397542008-12-19 12:59:28 +00006784 if ( aExp != 0 )
6785 aSig |= LIT64( 0x0010000000000000 );
6786 else if ( aSig == 0 )
6787 return a;
6788
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006789 if (n > 0x1000) {
6790 n = 0x1000;
6791 } else if (n < -0x1000) {
6792 n = -0x1000;
6793 }
6794
pbrook69397542008-12-19 12:59:28 +00006795 aExp += n - 1;
6796 aSig <<= 10;
6797 return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006798}
6799
pbrook9ee6e8b2007-11-11 00:04:49 +00006800floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
6801{
6802 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006803 int32_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006804 uint64_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00006805
6806 aSig = extractFloatx80Frac( a );
6807 aExp = extractFloatx80Exp( a );
6808 aSign = extractFloatx80Sign( a );
6809
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006810 if ( aExp == 0x7FFF ) {
6811 if ( aSig<<1 ) {
6812 return propagateFloatx80NaN( a, a STATUS_VAR );
6813 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006814 return a;
6815 }
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006816
pbrook69397542008-12-19 12:59:28 +00006817 if (aExp == 0 && aSig == 0)
6818 return a;
6819
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006820 if (n > 0x10000) {
6821 n = 0x10000;
6822 } else if (n < -0x10000) {
6823 n = -0x10000;
6824 }
6825
pbrook9ee6e8b2007-11-11 00:04:49 +00006826 aExp += n;
pbrook69397542008-12-19 12:59:28 +00006827 return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
6828 aSign, aExp, aSig, 0 STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006829}
pbrook9ee6e8b2007-11-11 00:04:49 +00006830
pbrook9ee6e8b2007-11-11 00:04:49 +00006831float128 float128_scalbn( float128 a, int n STATUS_PARAM )
6832{
6833 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006834 int32_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006835 uint64_t aSig0, aSig1;
pbrook9ee6e8b2007-11-11 00:04:49 +00006836
6837 aSig1 = extractFloat128Frac1( a );
6838 aSig0 = extractFloat128Frac0( a );
6839 aExp = extractFloat128Exp( a );
6840 aSign = extractFloat128Sign( a );
6841 if ( aExp == 0x7FFF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006842 if ( aSig0 | aSig1 ) {
6843 return propagateFloat128NaN( a, a STATUS_VAR );
6844 }
pbrook9ee6e8b2007-11-11 00:04:49 +00006845 return a;
6846 }
pbrook69397542008-12-19 12:59:28 +00006847 if ( aExp != 0 )
6848 aSig0 |= LIT64( 0x0001000000000000 );
6849 else if ( aSig0 == 0 && aSig1 == 0 )
6850 return a;
6851
Aurelien Jarno326b9e92011-04-20 13:04:22 +02006852 if (n > 0x10000) {
6853 n = 0x10000;
6854 } else if (n < -0x10000) {
6855 n = -0x10000;
6856 }
6857
pbrook69397542008-12-19 12:59:28 +00006858 aExp += n - 1;
6859 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
6860 STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00006861
6862}