blob: 9274ebf1011cd369645eeacccbe02de49116864a [file] [log] [blame]
Andreas Färber8d725fa2011-03-07 01:34:04 +01001/*
2 * QEMU float support
3 *
4 * Derived from SoftFloat.
5 */
bellard158142c2005-03-13 16:54:06 +00006
7/*============================================================================
8
9This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
10Package, Release 2b.
11
12Written by John R. Hauser. This work was made possible in part by the
13International Computer Science Institute, located at Suite 600, 1947 Center
14Street, Berkeley, California 94704. Funding was partially provided by the
15National Science Foundation under grant MIP-9311980. The original version
16of this code was written as part of a project to build a fixed-point vector
17processor in collaboration with the University of California at Berkeley,
18overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
20arithmetic/SoftFloat.html'.
21
22THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
23been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
24RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
25AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
26COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
27EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
28INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
29OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
30
31Derivative works are acceptable, even for commercial purposes, so long as
32(1) the source code for the derivative work includes prominent notice that
33the work is derivative, and (2) the source code includes prominent notice with
34these four paragraphs for those parts of this code that are retained.
35
36=============================================================================*/
37
Peter Maydell2ac8bd02011-09-26 16:56:55 +010038/* softfloat (and in particular the code in softfloat-specialize.h) is
39 * target-dependent and needs the TARGET_* macros.
40 */
41#include "config.h"
42
Paolo Bonzini6b4c3052012-10-24 13:12:00 +020043#include "fpu/softfloat.h"
bellard158142c2005-03-13 16:54:06 +000044
Peter Maydelldc355b72014-01-07 17:19:12 +000045/* We only need stdlib for abort() */
46#include <stdlib.h>
47
bellard158142c2005-03-13 16:54:06 +000048/*----------------------------------------------------------------------------
49| Primitive arithmetic functions, including multi-word arithmetic, and
50| division and square root approximations. (Can be specialized to target if
51| desired.)
52*----------------------------------------------------------------------------*/
53#include "softfloat-macros.h"
54
55/*----------------------------------------------------------------------------
56| Functions and definitions to determine: (1) whether tininess for underflow
57| is detected before or after rounding by default, (2) what (if anything)
58| happens when exceptions are raised, (3) how signaling NaNs are distinguished
59| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
60| are propagated from function inputs to output. These details are target-
61| specific.
62*----------------------------------------------------------------------------*/
63#include "softfloat-specialize.h"
64
bellard158142c2005-03-13 16:54:06 +000065/*----------------------------------------------------------------------------
Peter Maydellbb4d4bb2011-02-10 11:28:56 +000066| Returns the fraction bits of the half-precision floating-point value `a'.
67*----------------------------------------------------------------------------*/
68
Luiz Capitulinoa49db982014-06-19 10:13:43 -040069static inline uint32_t extractFloat16Frac(float16 a)
Peter Maydellbb4d4bb2011-02-10 11:28:56 +000070{
71 return float16_val(a) & 0x3ff;
72}
73
74/*----------------------------------------------------------------------------
75| Returns the exponent bits of the half-precision floating-point value `a'.
76*----------------------------------------------------------------------------*/
77
Luiz Capitulinoa49db982014-06-19 10:13:43 -040078static inline int_fast16_t extractFloat16Exp(float16 a)
Peter Maydellbb4d4bb2011-02-10 11:28:56 +000079{
80 return (float16_val(a) >> 10) & 0x1f;
81}
82
83/*----------------------------------------------------------------------------
84| Returns the sign bit of the single-precision floating-point value `a'.
85*----------------------------------------------------------------------------*/
86
Luiz Capitulinoa49db982014-06-19 10:13:43 -040087static inline flag extractFloat16Sign(float16 a)
Peter Maydellbb4d4bb2011-02-10 11:28:56 +000088{
89 return float16_val(a)>>15;
90}
91
92/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +000093| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
94| and 7, and returns the properly rounded 32-bit integer corresponding to the
95| input. If `zSign' is 1, the input is negated before being converted to an
96| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
97| is simply rounded to an integer, with the inexact exception raised if the
98| input cannot be represented exactly as an integer. However, if the fixed-
99| point input is too large, the invalid exception is raised and the largest
100| positive or negative integer is returned.
101*----------------------------------------------------------------------------*/
102
Andreas Färberbb98fe42011-03-07 01:34:06 +0100103static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000104{
105 int8 roundingMode;
106 flag roundNearestEven;
107 int8 roundIncrement, roundBits;
Peter Maydell760e1412012-04-05 19:12:35 +0100108 int32_t z;
bellard158142c2005-03-13 16:54:06 +0000109
110 roundingMode = STATUS(float_rounding_mode);
111 roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydelldc355b72014-01-07 17:19:12 +0000112 switch (roundingMode) {
113 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000114 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000115 roundIncrement = 0x40;
116 break;
117 case float_round_to_zero:
118 roundIncrement = 0;
119 break;
120 case float_round_up:
121 roundIncrement = zSign ? 0 : 0x7f;
122 break;
123 case float_round_down:
124 roundIncrement = zSign ? 0x7f : 0;
125 break;
126 default:
127 abort();
bellard158142c2005-03-13 16:54:06 +0000128 }
129 roundBits = absZ & 0x7F;
130 absZ = ( absZ + roundIncrement )>>7;
131 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
132 z = absZ;
133 if ( zSign ) z = - z;
134 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
135 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +0100136 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +0000137 }
138 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
139 return z;
140
141}
142
143/*----------------------------------------------------------------------------
144| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
145| `absZ1', with binary point between bits 63 and 64 (between the input words),
146| and returns the properly rounded 64-bit integer corresponding to the input.
147| If `zSign' is 1, the input is negated before being converted to an integer.
148| Ordinarily, the fixed-point input is simply rounded to an integer, with
149| the inexact exception raised if the input cannot be represented exactly as
150| an integer. However, if the fixed-point input is too large, the invalid
151| exception is raised and the largest positive or negative integer is
152| returned.
153*----------------------------------------------------------------------------*/
154
Andreas Färberbb98fe42011-03-07 01:34:06 +0100155static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000156{
157 int8 roundingMode;
158 flag roundNearestEven, increment;
Peter Maydell760e1412012-04-05 19:12:35 +0100159 int64_t z;
bellard158142c2005-03-13 16:54:06 +0000160
161 roundingMode = STATUS(float_rounding_mode);
162 roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydelldc355b72014-01-07 17:19:12 +0000163 switch (roundingMode) {
164 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000165 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000166 increment = ((int64_t) absZ1 < 0);
167 break;
168 case float_round_to_zero:
169 increment = 0;
170 break;
171 case float_round_up:
172 increment = !zSign && absZ1;
173 break;
174 case float_round_down:
175 increment = zSign && absZ1;
176 break;
177 default:
178 abort();
bellard158142c2005-03-13 16:54:06 +0000179 }
180 if ( increment ) {
181 ++absZ0;
182 if ( absZ0 == 0 ) goto overflow;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100183 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
bellard158142c2005-03-13 16:54:06 +0000184 }
185 z = absZ0;
186 if ( zSign ) z = - z;
187 if ( z && ( ( z < 0 ) ^ zSign ) ) {
188 overflow:
189 float_raise( float_flag_invalid STATUS_VAR);
190 return
Andreas Färberbb98fe42011-03-07 01:34:06 +0100191 zSign ? (int64_t) LIT64( 0x8000000000000000 )
bellard158142c2005-03-13 16:54:06 +0000192 : LIT64( 0x7FFFFFFFFFFFFFFF );
193 }
194 if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
195 return z;
196
197}
198
199/*----------------------------------------------------------------------------
Tom Mustafb3ea832014-01-07 17:17:49 +0000200| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
201| `absZ1', with binary point between bits 63 and 64 (between the input words),
202| and returns the properly rounded 64-bit unsigned integer corresponding to the
203| input. Ordinarily, the fixed-point input is simply rounded to an integer,
204| with the inexact exception raised if the input cannot be represented exactly
205| as an integer. However, if the fixed-point input is too large, the invalid
206| exception is raised and the largest unsigned integer is returned.
207*----------------------------------------------------------------------------*/
208
209static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
210 uint64_t absZ1 STATUS_PARAM)
211{
212 int8 roundingMode;
213 flag roundNearestEven, increment;
214
215 roundingMode = STATUS(float_rounding_mode);
216 roundNearestEven = (roundingMode == float_round_nearest_even);
Peter Maydelldc355b72014-01-07 17:19:12 +0000217 switch (roundingMode) {
218 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000219 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000220 increment = ((int64_t)absZ1 < 0);
221 break;
222 case float_round_to_zero:
223 increment = 0;
224 break;
225 case float_round_up:
226 increment = !zSign && absZ1;
227 break;
228 case float_round_down:
229 increment = zSign && absZ1;
230 break;
231 default:
232 abort();
Tom Mustafb3ea832014-01-07 17:17:49 +0000233 }
234 if (increment) {
235 ++absZ0;
236 if (absZ0 == 0) {
237 float_raise(float_flag_invalid STATUS_VAR);
238 return LIT64(0xFFFFFFFFFFFFFFFF);
239 }
240 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
241 }
242
243 if (zSign && absZ0) {
244 float_raise(float_flag_invalid STATUS_VAR);
245 return 0;
246 }
247
248 if (absZ1) {
249 STATUS(float_exception_flags) |= float_flag_inexact;
250 }
251 return absZ0;
252}
253
254/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000255| Returns the fraction bits of the single-precision floating-point value `a'.
256*----------------------------------------------------------------------------*/
257
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400258static inline uint32_t extractFloat32Frac( float32 a )
bellard158142c2005-03-13 16:54:06 +0000259{
260
pbrookf090c9d2007-11-18 14:33:24 +0000261 return float32_val(a) & 0x007FFFFF;
bellard158142c2005-03-13 16:54:06 +0000262
263}
264
265/*----------------------------------------------------------------------------
266| Returns the exponent bits of the single-precision floating-point value `a'.
267*----------------------------------------------------------------------------*/
268
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400269static inline int_fast16_t extractFloat32Exp(float32 a)
bellard158142c2005-03-13 16:54:06 +0000270{
271
pbrookf090c9d2007-11-18 14:33:24 +0000272 return ( float32_val(a)>>23 ) & 0xFF;
bellard158142c2005-03-13 16:54:06 +0000273
274}
275
276/*----------------------------------------------------------------------------
277| Returns the sign bit of the single-precision floating-point value `a'.
278*----------------------------------------------------------------------------*/
279
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400280static inline flag extractFloat32Sign( float32 a )
bellard158142c2005-03-13 16:54:06 +0000281{
282
pbrookf090c9d2007-11-18 14:33:24 +0000283 return float32_val(a)>>31;
bellard158142c2005-03-13 16:54:06 +0000284
285}
286
287/*----------------------------------------------------------------------------
Peter Maydell37d18662011-01-06 19:37:53 +0000288| If `a' is denormal and we are in flush-to-zero mode then set the
289| input-denormal exception and return zero. Otherwise just return the value.
290*----------------------------------------------------------------------------*/
Alex Bennée7baeabc2014-03-17 16:31:51 +0000291float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
Peter Maydell37d18662011-01-06 19:37:53 +0000292{
293 if (STATUS(flush_inputs_to_zero)) {
294 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
295 float_raise(float_flag_input_denormal STATUS_VAR);
296 return make_float32(float32_val(a) & 0x80000000);
297 }
298 }
299 return a;
300}
301
302/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000303| Normalizes the subnormal single-precision floating-point value represented
304| by the denormalized significand `aSig'. The normalized exponent and
305| significand are stored at the locations pointed to by `zExpPtr' and
306| `zSigPtr', respectively.
307*----------------------------------------------------------------------------*/
308
309static void
Andreas Färber94a49d82012-04-26 00:15:56 +0200310 normalizeFloat32Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, uint32_t *zSigPtr)
bellard158142c2005-03-13 16:54:06 +0000311{
312 int8 shiftCount;
313
314 shiftCount = countLeadingZeros32( aSig ) - 8;
315 *zSigPtr = aSig<<shiftCount;
316 *zExpPtr = 1 - shiftCount;
317
318}
319
320/*----------------------------------------------------------------------------
321| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
322| single-precision floating-point value, returning the result. After being
323| shifted into the proper positions, the three fields are simply added
324| together to form the result. This means that any integer portion of `zSig'
325| will be added into the exponent. Since a properly normalized significand
326| will have an integer portion equal to 1, the `zExp' input should be 1 less
327| than the desired result exponent whenever `zSig' is a complete, normalized
328| significand.
329*----------------------------------------------------------------------------*/
330
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400331static inline float32 packFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig)
bellard158142c2005-03-13 16:54:06 +0000332{
333
pbrookf090c9d2007-11-18 14:33:24 +0000334 return make_float32(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100335 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
bellard158142c2005-03-13 16:54:06 +0000336
337}
338
339/*----------------------------------------------------------------------------
340| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
341| and significand `zSig', and returns the proper single-precision floating-
342| point value corresponding to the abstract input. Ordinarily, the abstract
343| value is simply rounded and packed into the single-precision format, with
344| the inexact exception raised if the abstract input cannot be represented
345| exactly. However, if the abstract value is too large, the overflow and
346| inexact exceptions are raised and an infinity or maximal finite value is
347| returned. If the abstract value is too small, the input value is rounded to
348| a subnormal number, and the underflow and inexact exceptions are raised if
349| the abstract input cannot be represented exactly as a subnormal single-
350| precision floating-point number.
351| The input significand `zSig' has its binary point between bits 30
352| and 29, which is 7 bits to the left of the usual location. This shifted
353| significand must be normalized or smaller. If `zSig' is not normalized,
354| `zExp' must be 0; in that case, the result returned is a subnormal number,
355| and it must not require rounding. In the usual case that `zSig' is
356| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
357| The handling of underflow and overflow follows the IEC/IEEE Standard for
358| Binary Floating-Point Arithmetic.
359*----------------------------------------------------------------------------*/
360
Andreas Färber94a49d82012-04-26 00:15:56 +0200361static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000362{
363 int8 roundingMode;
364 flag roundNearestEven;
365 int8 roundIncrement, roundBits;
366 flag isTiny;
367
368 roundingMode = STATUS(float_rounding_mode);
369 roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydelldc355b72014-01-07 17:19:12 +0000370 switch (roundingMode) {
371 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000372 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000373 roundIncrement = 0x40;
374 break;
375 case float_round_to_zero:
376 roundIncrement = 0;
377 break;
378 case float_round_up:
379 roundIncrement = zSign ? 0 : 0x7f;
380 break;
381 case float_round_down:
382 roundIncrement = zSign ? 0x7f : 0;
383 break;
384 default:
385 abort();
386 break;
bellard158142c2005-03-13 16:54:06 +0000387 }
388 roundBits = zSig & 0x7F;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100389 if ( 0xFD <= (uint16_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +0000390 if ( ( 0xFD < zExp )
391 || ( ( zExp == 0xFD )
Andreas Färberbb98fe42011-03-07 01:34:06 +0100392 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
bellard158142c2005-03-13 16:54:06 +0000393 ) {
394 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
pbrookf090c9d2007-11-18 14:33:24 +0000395 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
bellard158142c2005-03-13 16:54:06 +0000396 }
397 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100398 if (STATUS(flush_to_zero)) {
399 float_raise(float_flag_output_denormal STATUS_VAR);
400 return packFloat32(zSign, 0, 0);
401 }
bellard158142c2005-03-13 16:54:06 +0000402 isTiny =
403 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
404 || ( zExp < -1 )
405 || ( zSig + roundIncrement < 0x80000000 );
406 shift32RightJamming( zSig, - zExp, &zSig );
407 zExp = 0;
408 roundBits = zSig & 0x7F;
409 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
410 }
411 }
412 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
413 zSig = ( zSig + roundIncrement )>>7;
414 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
415 if ( zSig == 0 ) zExp = 0;
416 return packFloat32( zSign, zExp, zSig );
417
418}
419
420/*----------------------------------------------------------------------------
421| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
422| and significand `zSig', and returns the proper single-precision floating-
423| point value corresponding to the abstract input. This routine is just like
424| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
425| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
426| floating-point exponent.
427*----------------------------------------------------------------------------*/
428
429static float32
Andreas Färber94a49d82012-04-26 00:15:56 +0200430 normalizeRoundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000431{
432 int8 shiftCount;
433
434 shiftCount = countLeadingZeros32( zSig ) - 1;
435 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
436
437}
438
439/*----------------------------------------------------------------------------
440| Returns the fraction bits of the double-precision floating-point value `a'.
441*----------------------------------------------------------------------------*/
442
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400443static inline uint64_t extractFloat64Frac( float64 a )
bellard158142c2005-03-13 16:54:06 +0000444{
445
pbrookf090c9d2007-11-18 14:33:24 +0000446 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
bellard158142c2005-03-13 16:54:06 +0000447
448}
449
450/*----------------------------------------------------------------------------
451| Returns the exponent bits of the double-precision floating-point value `a'.
452*----------------------------------------------------------------------------*/
453
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400454static inline int_fast16_t extractFloat64Exp(float64 a)
bellard158142c2005-03-13 16:54:06 +0000455{
456
pbrookf090c9d2007-11-18 14:33:24 +0000457 return ( float64_val(a)>>52 ) & 0x7FF;
bellard158142c2005-03-13 16:54:06 +0000458
459}
460
461/*----------------------------------------------------------------------------
462| Returns the sign bit of the double-precision floating-point value `a'.
463*----------------------------------------------------------------------------*/
464
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400465static inline flag extractFloat64Sign( float64 a )
bellard158142c2005-03-13 16:54:06 +0000466{
467
pbrookf090c9d2007-11-18 14:33:24 +0000468 return float64_val(a)>>63;
bellard158142c2005-03-13 16:54:06 +0000469
470}
471
472/*----------------------------------------------------------------------------
Peter Maydell37d18662011-01-06 19:37:53 +0000473| If `a' is denormal and we are in flush-to-zero mode then set the
474| input-denormal exception and return zero. Otherwise just return the value.
475*----------------------------------------------------------------------------*/
Alex Bennée7baeabc2014-03-17 16:31:51 +0000476float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
Peter Maydell37d18662011-01-06 19:37:53 +0000477{
478 if (STATUS(flush_inputs_to_zero)) {
479 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
480 float_raise(float_flag_input_denormal STATUS_VAR);
481 return make_float64(float64_val(a) & (1ULL << 63));
482 }
483 }
484 return a;
485}
486
487/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +0000488| Normalizes the subnormal double-precision floating-point value represented
489| by the denormalized significand `aSig'. The normalized exponent and
490| significand are stored at the locations pointed to by `zExpPtr' and
491| `zSigPtr', respectively.
492*----------------------------------------------------------------------------*/
493
494static void
Andreas Färber94a49d82012-04-26 00:15:56 +0200495 normalizeFloat64Subnormal(uint64_t aSig, int_fast16_t *zExpPtr, uint64_t *zSigPtr)
bellard158142c2005-03-13 16:54:06 +0000496{
497 int8 shiftCount;
498
499 shiftCount = countLeadingZeros64( aSig ) - 11;
500 *zSigPtr = aSig<<shiftCount;
501 *zExpPtr = 1 - shiftCount;
502
503}
504
505/*----------------------------------------------------------------------------
506| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
507| double-precision floating-point value, returning the result. After being
508| shifted into the proper positions, the three fields are simply added
509| together to form the result. This means that any integer portion of `zSig'
510| will be added into the exponent. Since a properly normalized significand
511| will have an integer portion equal to 1, the `zExp' input should be 1 less
512| than the desired result exponent whenever `zSig' is a complete, normalized
513| significand.
514*----------------------------------------------------------------------------*/
515
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400516static inline float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig)
bellard158142c2005-03-13 16:54:06 +0000517{
518
pbrookf090c9d2007-11-18 14:33:24 +0000519 return make_float64(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100520 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
bellard158142c2005-03-13 16:54:06 +0000521
522}
523
524/*----------------------------------------------------------------------------
525| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
526| and significand `zSig', and returns the proper double-precision floating-
527| point value corresponding to the abstract input. Ordinarily, the abstract
528| value is simply rounded and packed into the double-precision format, with
529| the inexact exception raised if the abstract input cannot be represented
530| exactly. However, if the abstract value is too large, the overflow and
531| inexact exceptions are raised and an infinity or maximal finite value is
532| returned. If the abstract value is too small, the input value is rounded
533| to a subnormal number, and the underflow and inexact exceptions are raised
534| if the abstract input cannot be represented exactly as a subnormal double-
535| precision floating-point number.
536| The input significand `zSig' has its binary point between bits 62
537| and 61, which is 10 bits to the left of the usual location. This shifted
538| significand must be normalized or smaller. If `zSig' is not normalized,
539| `zExp' must be 0; in that case, the result returned is a subnormal number,
540| and it must not require rounding. In the usual case that `zSig' is
541| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
542| The handling of underflow and overflow follows the IEC/IEEE Standard for
543| Binary Floating-Point Arithmetic.
544*----------------------------------------------------------------------------*/
545
Andreas Färber94a49d82012-04-26 00:15:56 +0200546static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000547{
548 int8 roundingMode;
549 flag roundNearestEven;
Andreas Färber94a49d82012-04-26 00:15:56 +0200550 int_fast16_t roundIncrement, roundBits;
bellard158142c2005-03-13 16:54:06 +0000551 flag isTiny;
552
553 roundingMode = STATUS(float_rounding_mode);
554 roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydelldc355b72014-01-07 17:19:12 +0000555 switch (roundingMode) {
556 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000557 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000558 roundIncrement = 0x200;
559 break;
560 case float_round_to_zero:
561 roundIncrement = 0;
562 break;
563 case float_round_up:
564 roundIncrement = zSign ? 0 : 0x3ff;
565 break;
566 case float_round_down:
567 roundIncrement = zSign ? 0x3ff : 0;
568 break;
569 default:
570 abort();
bellard158142c2005-03-13 16:54:06 +0000571 }
572 roundBits = zSig & 0x3FF;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100573 if ( 0x7FD <= (uint16_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +0000574 if ( ( 0x7FD < zExp )
575 || ( ( zExp == 0x7FD )
Andreas Färberbb98fe42011-03-07 01:34:06 +0100576 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
bellard158142c2005-03-13 16:54:06 +0000577 ) {
578 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
pbrookf090c9d2007-11-18 14:33:24 +0000579 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
bellard158142c2005-03-13 16:54:06 +0000580 }
581 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100582 if (STATUS(flush_to_zero)) {
583 float_raise(float_flag_output_denormal STATUS_VAR);
584 return packFloat64(zSign, 0, 0);
585 }
bellard158142c2005-03-13 16:54:06 +0000586 isTiny =
587 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
588 || ( zExp < -1 )
589 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
590 shift64RightJamming( zSig, - zExp, &zSig );
591 zExp = 0;
592 roundBits = zSig & 0x3FF;
593 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
594 }
595 }
596 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
597 zSig = ( zSig + roundIncrement )>>10;
598 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
599 if ( zSig == 0 ) zExp = 0;
600 return packFloat64( zSign, zExp, zSig );
601
602}
603
604/*----------------------------------------------------------------------------
605| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
606| and significand `zSig', and returns the proper double-precision floating-
607| point value corresponding to the abstract input. This routine is just like
608| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
609| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
610| floating-point exponent.
611*----------------------------------------------------------------------------*/
612
613static float64
Andreas Färber94a49d82012-04-26 00:15:56 +0200614 normalizeRoundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +0000615{
616 int8 shiftCount;
617
618 shiftCount = countLeadingZeros64( zSig ) - 1;
619 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
620
621}
622
bellard158142c2005-03-13 16:54:06 +0000623/*----------------------------------------------------------------------------
624| Returns the fraction bits of the extended double-precision floating-point
625| value `a'.
626*----------------------------------------------------------------------------*/
627
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400628static inline uint64_t extractFloatx80Frac( floatx80 a )
bellard158142c2005-03-13 16:54:06 +0000629{
630
631 return a.low;
632
633}
634
635/*----------------------------------------------------------------------------
636| Returns the exponent bits of the extended double-precision floating-point
637| value `a'.
638*----------------------------------------------------------------------------*/
639
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400640static inline int32 extractFloatx80Exp( floatx80 a )
bellard158142c2005-03-13 16:54:06 +0000641{
642
643 return a.high & 0x7FFF;
644
645}
646
647/*----------------------------------------------------------------------------
648| Returns the sign bit of the extended double-precision floating-point value
649| `a'.
650*----------------------------------------------------------------------------*/
651
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400652static inline flag extractFloatx80Sign( floatx80 a )
bellard158142c2005-03-13 16:54:06 +0000653{
654
655 return a.high>>15;
656
657}
658
659/*----------------------------------------------------------------------------
660| Normalizes the subnormal extended double-precision floating-point value
661| represented by the denormalized significand `aSig'. The normalized exponent
662| and significand are stored at the locations pointed to by `zExpPtr' and
663| `zSigPtr', respectively.
664*----------------------------------------------------------------------------*/
665
666static void
Andreas Färberbb98fe42011-03-07 01:34:06 +0100667 normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
bellard158142c2005-03-13 16:54:06 +0000668{
669 int8 shiftCount;
670
671 shiftCount = countLeadingZeros64( aSig );
672 *zSigPtr = aSig<<shiftCount;
673 *zExpPtr = 1 - shiftCount;
674
675}
676
677/*----------------------------------------------------------------------------
678| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
679| extended double-precision floating-point value, returning the result.
680*----------------------------------------------------------------------------*/
681
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400682static inline floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
bellard158142c2005-03-13 16:54:06 +0000683{
684 floatx80 z;
685
686 z.low = zSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100687 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
bellard158142c2005-03-13 16:54:06 +0000688 return z;
689
690}
691
692/*----------------------------------------------------------------------------
693| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
694| and extended significand formed by the concatenation of `zSig0' and `zSig1',
695| and returns the proper extended double-precision floating-point value
696| corresponding to the abstract input. Ordinarily, the abstract value is
697| rounded and packed into the extended double-precision format, with the
698| inexact exception raised if the abstract input cannot be represented
699| exactly. However, if the abstract value is too large, the overflow and
700| inexact exceptions are raised and an infinity or maximal finite value is
701| returned. If the abstract value is too small, the input value is rounded to
702| a subnormal number, and the underflow and inexact exceptions are raised if
703| the abstract input cannot be represented exactly as a subnormal extended
704| double-precision floating-point number.
705| If `roundingPrecision' is 32 or 64, the result is rounded to the same
706| number of bits as single or double precision, respectively. Otherwise, the
707| result is rounded to the full precision of the extended double-precision
708| format.
709| The input significand must be normalized or smaller. If the input
710| significand is not normalized, `zExp' must be 0; in that case, the result
711| returned is a subnormal number, and it must not require rounding. The
712| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
713| Floating-Point Arithmetic.
714*----------------------------------------------------------------------------*/
715
716static floatx80
717 roundAndPackFloatx80(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100718 int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard158142c2005-03-13 16:54:06 +0000719 STATUS_PARAM)
720{
721 int8 roundingMode;
722 flag roundNearestEven, increment, isTiny;
723 int64 roundIncrement, roundMask, roundBits;
724
725 roundingMode = STATUS(float_rounding_mode);
726 roundNearestEven = ( roundingMode == float_round_nearest_even );
727 if ( roundingPrecision == 80 ) goto precision80;
728 if ( roundingPrecision == 64 ) {
729 roundIncrement = LIT64( 0x0000000000000400 );
730 roundMask = LIT64( 0x00000000000007FF );
731 }
732 else if ( roundingPrecision == 32 ) {
733 roundIncrement = LIT64( 0x0000008000000000 );
734 roundMask = LIT64( 0x000000FFFFFFFFFF );
735 }
736 else {
737 goto precision80;
738 }
739 zSig0 |= ( zSig1 != 0 );
Peter Maydelldc355b72014-01-07 17:19:12 +0000740 switch (roundingMode) {
741 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000742 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000743 break;
744 case float_round_to_zero:
745 roundIncrement = 0;
746 break;
747 case float_round_up:
748 roundIncrement = zSign ? 0 : roundMask;
749 break;
750 case float_round_down:
751 roundIncrement = zSign ? roundMask : 0;
752 break;
753 default:
754 abort();
bellard158142c2005-03-13 16:54:06 +0000755 }
756 roundBits = zSig0 & roundMask;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100757 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard158142c2005-03-13 16:54:06 +0000758 if ( ( 0x7FFE < zExp )
759 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
760 ) {
761 goto overflow;
762 }
763 if ( zExp <= 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +0100764 if (STATUS(flush_to_zero)) {
765 float_raise(float_flag_output_denormal STATUS_VAR);
766 return packFloatx80(zSign, 0, 0);
767 }
bellard158142c2005-03-13 16:54:06 +0000768 isTiny =
769 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
770 || ( zExp < 0 )
771 || ( zSig0 <= zSig0 + roundIncrement );
772 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
773 zExp = 0;
774 roundBits = zSig0 & roundMask;
775 if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
776 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
777 zSig0 += roundIncrement;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100778 if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard158142c2005-03-13 16:54:06 +0000779 roundIncrement = roundMask + 1;
780 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
781 roundMask |= roundIncrement;
782 }
783 zSig0 &= ~ roundMask;
784 return packFloatx80( zSign, zExp, zSig0 );
785 }
786 }
787 if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
788 zSig0 += roundIncrement;
789 if ( zSig0 < roundIncrement ) {
790 ++zExp;
791 zSig0 = LIT64( 0x8000000000000000 );
792 }
793 roundIncrement = roundMask + 1;
794 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
795 roundMask |= roundIncrement;
796 }
797 zSig0 &= ~ roundMask;
798 if ( zSig0 == 0 ) zExp = 0;
799 return packFloatx80( zSign, zExp, zSig0 );
800 precision80:
Peter Maydelldc355b72014-01-07 17:19:12 +0000801 switch (roundingMode) {
802 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000803 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000804 increment = ((int64_t)zSig1 < 0);
805 break;
806 case float_round_to_zero:
807 increment = 0;
808 break;
809 case float_round_up:
810 increment = !zSign && zSig1;
811 break;
812 case float_round_down:
813 increment = zSign && zSig1;
814 break;
815 default:
816 abort();
bellard158142c2005-03-13 16:54:06 +0000817 }
Andreas Färberbb98fe42011-03-07 01:34:06 +0100818 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
bellard158142c2005-03-13 16:54:06 +0000819 if ( ( 0x7FFE < zExp )
820 || ( ( zExp == 0x7FFE )
821 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
822 && increment
823 )
824 ) {
825 roundMask = 0;
826 overflow:
827 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
828 if ( ( roundingMode == float_round_to_zero )
829 || ( zSign && ( roundingMode == float_round_up ) )
830 || ( ! zSign && ( roundingMode == float_round_down ) )
831 ) {
832 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
833 }
834 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
835 }
836 if ( zExp <= 0 ) {
837 isTiny =
838 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
839 || ( zExp < 0 )
840 || ! increment
841 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
842 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
843 zExp = 0;
844 if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
845 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
Peter Maydelldc355b72014-01-07 17:19:12 +0000846 switch (roundingMode) {
847 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +0000848 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +0000849 increment = ((int64_t)zSig1 < 0);
850 break;
851 case float_round_to_zero:
852 increment = 0;
853 break;
854 case float_round_up:
855 increment = !zSign && zSig1;
856 break;
857 case float_round_down:
858 increment = zSign && zSig1;
859 break;
860 default:
861 abort();
bellard158142c2005-03-13 16:54:06 +0000862 }
863 if ( increment ) {
864 ++zSig0;
865 zSig0 &=
Andreas Färberbb98fe42011-03-07 01:34:06 +0100866 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
867 if ( (int64_t) zSig0 < 0 ) zExp = 1;
bellard158142c2005-03-13 16:54:06 +0000868 }
869 return packFloatx80( zSign, zExp, zSig0 );
870 }
871 }
872 if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
873 if ( increment ) {
874 ++zSig0;
875 if ( zSig0 == 0 ) {
876 ++zExp;
877 zSig0 = LIT64( 0x8000000000000000 );
878 }
879 else {
Andreas Färberbb98fe42011-03-07 01:34:06 +0100880 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
bellard158142c2005-03-13 16:54:06 +0000881 }
882 }
883 else {
884 if ( zSig0 == 0 ) zExp = 0;
885 }
886 return packFloatx80( zSign, zExp, zSig0 );
887
888}
889
890/*----------------------------------------------------------------------------
891| Takes an abstract floating-point value having sign `zSign', exponent
892| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
893| and returns the proper extended double-precision floating-point value
894| corresponding to the abstract input. This routine is just like
895| `roundAndPackFloatx80' except that the input significand does not have to be
896| normalized.
897*----------------------------------------------------------------------------*/
898
899static floatx80
900 normalizeRoundAndPackFloatx80(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100901 int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
bellard158142c2005-03-13 16:54:06 +0000902 STATUS_PARAM)
903{
904 int8 shiftCount;
905
906 if ( zSig0 == 0 ) {
907 zSig0 = zSig1;
908 zSig1 = 0;
909 zExp -= 64;
910 }
911 shiftCount = countLeadingZeros64( zSig0 );
912 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
913 zExp -= shiftCount;
914 return
915 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
916
917}
918
bellard158142c2005-03-13 16:54:06 +0000919/*----------------------------------------------------------------------------
920| Returns the least-significant 64 fraction bits of the quadruple-precision
921| floating-point value `a'.
922*----------------------------------------------------------------------------*/
923
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400924static inline uint64_t extractFloat128Frac1( float128 a )
bellard158142c2005-03-13 16:54:06 +0000925{
926
927 return a.low;
928
929}
930
931/*----------------------------------------------------------------------------
932| Returns the most-significant 48 fraction bits of the quadruple-precision
933| floating-point value `a'.
934*----------------------------------------------------------------------------*/
935
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400936static inline uint64_t extractFloat128Frac0( float128 a )
bellard158142c2005-03-13 16:54:06 +0000937{
938
939 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
940
941}
942
943/*----------------------------------------------------------------------------
944| Returns the exponent bits of the quadruple-precision floating-point value
945| `a'.
946*----------------------------------------------------------------------------*/
947
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400948static inline int32 extractFloat128Exp( float128 a )
bellard158142c2005-03-13 16:54:06 +0000949{
950
951 return ( a.high>>48 ) & 0x7FFF;
952
953}
954
955/*----------------------------------------------------------------------------
956| Returns the sign bit of the quadruple-precision floating-point value `a'.
957*----------------------------------------------------------------------------*/
958
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400959static inline flag extractFloat128Sign( float128 a )
bellard158142c2005-03-13 16:54:06 +0000960{
961
962 return a.high>>63;
963
964}
965
966/*----------------------------------------------------------------------------
967| Normalizes the subnormal quadruple-precision floating-point value
968| represented by the denormalized significand formed by the concatenation of
969| `aSig0' and `aSig1'. The normalized exponent is stored at the location
970| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
971| significand are stored at the location pointed to by `zSig0Ptr', and the
972| least significant 64 bits of the normalized significand are stored at the
973| location pointed to by `zSig1Ptr'.
974*----------------------------------------------------------------------------*/
975
976static void
977 normalizeFloat128Subnormal(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100978 uint64_t aSig0,
979 uint64_t aSig1,
bellard158142c2005-03-13 16:54:06 +0000980 int32 *zExpPtr,
Andreas Färberbb98fe42011-03-07 01:34:06 +0100981 uint64_t *zSig0Ptr,
982 uint64_t *zSig1Ptr
bellard158142c2005-03-13 16:54:06 +0000983 )
984{
985 int8 shiftCount;
986
987 if ( aSig0 == 0 ) {
988 shiftCount = countLeadingZeros64( aSig1 ) - 15;
989 if ( shiftCount < 0 ) {
990 *zSig0Ptr = aSig1>>( - shiftCount );
991 *zSig1Ptr = aSig1<<( shiftCount & 63 );
992 }
993 else {
994 *zSig0Ptr = aSig1<<shiftCount;
995 *zSig1Ptr = 0;
996 }
997 *zExpPtr = - shiftCount - 63;
998 }
999 else {
1000 shiftCount = countLeadingZeros64( aSig0 ) - 15;
1001 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1002 *zExpPtr = 1 - shiftCount;
1003 }
1004
1005}
1006
1007/*----------------------------------------------------------------------------
1008| Packs the sign `zSign', the exponent `zExp', and the significand formed
1009| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
1010| floating-point value, returning the result. After being shifted into the
1011| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
1012| added together to form the most significant 32 bits of the result. This
1013| means that any integer portion of `zSig0' will be added into the exponent.
1014| Since a properly normalized significand will have an integer portion equal
1015| to 1, the `zExp' input should be 1 less than the desired result exponent
1016| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
1017| significand.
1018*----------------------------------------------------------------------------*/
1019
Luiz Capitulinoa49db982014-06-19 10:13:43 -04001020static inline float128
Andreas Färberbb98fe42011-03-07 01:34:06 +01001021 packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
bellard158142c2005-03-13 16:54:06 +00001022{
1023 float128 z;
1024
1025 z.low = zSig1;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001026 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
bellard158142c2005-03-13 16:54:06 +00001027 return z;
1028
1029}
1030
1031/*----------------------------------------------------------------------------
1032| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1033| and extended significand formed by the concatenation of `zSig0', `zSig1',
1034| and `zSig2', and returns the proper quadruple-precision floating-point value
1035| corresponding to the abstract input. Ordinarily, the abstract value is
1036| simply rounded and packed into the quadruple-precision format, with the
1037| inexact exception raised if the abstract input cannot be represented
1038| exactly. However, if the abstract value is too large, the overflow and
1039| inexact exceptions are raised and an infinity or maximal finite value is
1040| returned. If the abstract value is too small, the input value is rounded to
1041| a subnormal number, and the underflow and inexact exceptions are raised if
1042| the abstract input cannot be represented exactly as a subnormal quadruple-
1043| precision floating-point number.
1044| The input significand must be normalized or smaller. If the input
1045| significand is not normalized, `zExp' must be 0; in that case, the result
1046| returned is a subnormal number, and it must not require rounding. In the
1047| usual case that the input significand is normalized, `zExp' must be 1 less
1048| than the ``true'' floating-point exponent. The handling of underflow and
1049| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1050*----------------------------------------------------------------------------*/
1051
1052static float128
1053 roundAndPackFloat128(
Andreas Färberbb98fe42011-03-07 01:34:06 +01001054 flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001055{
1056 int8 roundingMode;
1057 flag roundNearestEven, increment, isTiny;
1058
1059 roundingMode = STATUS(float_rounding_mode);
1060 roundNearestEven = ( roundingMode == float_round_nearest_even );
Peter Maydelldc355b72014-01-07 17:19:12 +00001061 switch (roundingMode) {
1062 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +00001063 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +00001064 increment = ((int64_t)zSig2 < 0);
1065 break;
1066 case float_round_to_zero:
1067 increment = 0;
1068 break;
1069 case float_round_up:
1070 increment = !zSign && zSig2;
1071 break;
1072 case float_round_down:
1073 increment = zSign && zSig2;
1074 break;
1075 default:
1076 abort();
bellard158142c2005-03-13 16:54:06 +00001077 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001078 if ( 0x7FFD <= (uint32_t) zExp ) {
bellard158142c2005-03-13 16:54:06 +00001079 if ( ( 0x7FFD < zExp )
1080 || ( ( zExp == 0x7FFD )
1081 && eq128(
1082 LIT64( 0x0001FFFFFFFFFFFF ),
1083 LIT64( 0xFFFFFFFFFFFFFFFF ),
1084 zSig0,
1085 zSig1
1086 )
1087 && increment
1088 )
1089 ) {
1090 float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
1091 if ( ( roundingMode == float_round_to_zero )
1092 || ( zSign && ( roundingMode == float_round_up ) )
1093 || ( ! zSign && ( roundingMode == float_round_down ) )
1094 ) {
1095 return
1096 packFloat128(
1097 zSign,
1098 0x7FFE,
1099 LIT64( 0x0000FFFFFFFFFFFF ),
1100 LIT64( 0xFFFFFFFFFFFFFFFF )
1101 );
1102 }
1103 return packFloat128( zSign, 0x7FFF, 0, 0 );
1104 }
1105 if ( zExp < 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01001106 if (STATUS(flush_to_zero)) {
1107 float_raise(float_flag_output_denormal STATUS_VAR);
1108 return packFloat128(zSign, 0, 0, 0);
1109 }
bellard158142c2005-03-13 16:54:06 +00001110 isTiny =
1111 ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
1112 || ( zExp < -1 )
1113 || ! increment
1114 || lt128(
1115 zSig0,
1116 zSig1,
1117 LIT64( 0x0001FFFFFFFFFFFF ),
1118 LIT64( 0xFFFFFFFFFFFFFFFF )
1119 );
1120 shift128ExtraRightJamming(
1121 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1122 zExp = 0;
1123 if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
Peter Maydelldc355b72014-01-07 17:19:12 +00001124 switch (roundingMode) {
1125 case float_round_nearest_even:
Peter Maydellf9288a72014-01-07 17:19:12 +00001126 case float_round_ties_away:
Peter Maydelldc355b72014-01-07 17:19:12 +00001127 increment = ((int64_t)zSig2 < 0);
1128 break;
1129 case float_round_to_zero:
1130 increment = 0;
1131 break;
1132 case float_round_up:
1133 increment = !zSign && zSig2;
1134 break;
1135 case float_round_down:
1136 increment = zSign && zSig2;
1137 break;
1138 default:
1139 abort();
bellard158142c2005-03-13 16:54:06 +00001140 }
1141 }
1142 }
1143 if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
1144 if ( increment ) {
1145 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1146 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1147 }
1148 else {
1149 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1150 }
1151 return packFloat128( zSign, zExp, zSig0, zSig1 );
1152
1153}
1154
1155/*----------------------------------------------------------------------------
1156| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1157| and significand formed by the concatenation of `zSig0' and `zSig1', and
1158| returns the proper quadruple-precision floating-point value corresponding
1159| to the abstract input. This routine is just like `roundAndPackFloat128'
1160| except that the input significand has fewer bits and does not have to be
1161| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1162| point exponent.
1163*----------------------------------------------------------------------------*/
1164
1165static float128
1166 normalizeRoundAndPackFloat128(
Andreas Färberbb98fe42011-03-07 01:34:06 +01001167 flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001168{
1169 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001170 uint64_t zSig2;
bellard158142c2005-03-13 16:54:06 +00001171
1172 if ( zSig0 == 0 ) {
1173 zSig0 = zSig1;
1174 zSig1 = 0;
1175 zExp -= 64;
1176 }
1177 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1178 if ( 0 <= shiftCount ) {
1179 zSig2 = 0;
1180 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1181 }
1182 else {
1183 shift128ExtraRightJamming(
1184 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1185 }
1186 zExp -= shiftCount;
1187 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
1188
1189}
1190
bellard158142c2005-03-13 16:54:06 +00001191/*----------------------------------------------------------------------------
1192| Returns the result of converting the 32-bit two's complement integer `a'
1193| to the single-precision floating-point format. The conversion is performed
1194| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1195*----------------------------------------------------------------------------*/
1196
Peter Maydellc4850f92014-01-07 17:17:49 +00001197float32 int32_to_float32(int32_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001198{
1199 flag zSign;
1200
pbrookf090c9d2007-11-18 14:33:24 +00001201 if ( a == 0 ) return float32_zero;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001202 if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
bellard158142c2005-03-13 16:54:06 +00001203 zSign = ( a < 0 );
1204 return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
1205
1206}
1207
1208/*----------------------------------------------------------------------------
1209| Returns the result of converting the 32-bit two's complement integer `a'
1210| to the double-precision floating-point format. The conversion is performed
1211| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1212*----------------------------------------------------------------------------*/
1213
Peter Maydellc4850f92014-01-07 17:17:49 +00001214float64 int32_to_float64(int32_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001215{
1216 flag zSign;
1217 uint32 absA;
1218 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001219 uint64_t zSig;
bellard158142c2005-03-13 16:54:06 +00001220
pbrookf090c9d2007-11-18 14:33:24 +00001221 if ( a == 0 ) return float64_zero;
bellard158142c2005-03-13 16:54:06 +00001222 zSign = ( a < 0 );
1223 absA = zSign ? - a : a;
1224 shiftCount = countLeadingZeros32( absA ) + 21;
1225 zSig = absA;
1226 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1227
1228}
1229
bellard158142c2005-03-13 16:54:06 +00001230/*----------------------------------------------------------------------------
1231| Returns the result of converting the 32-bit two's complement integer `a'
1232| to the extended double-precision floating-point format. The conversion
1233| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1234| Arithmetic.
1235*----------------------------------------------------------------------------*/
1236
Peter Maydellc4850f92014-01-07 17:17:49 +00001237floatx80 int32_to_floatx80(int32_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001238{
1239 flag zSign;
1240 uint32 absA;
1241 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001242 uint64_t zSig;
bellard158142c2005-03-13 16:54:06 +00001243
1244 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1245 zSign = ( a < 0 );
1246 absA = zSign ? - a : a;
1247 shiftCount = countLeadingZeros32( absA ) + 32;
1248 zSig = absA;
1249 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1250
1251}
1252
bellard158142c2005-03-13 16:54:06 +00001253/*----------------------------------------------------------------------------
1254| Returns the result of converting the 32-bit two's complement integer `a' to
1255| the quadruple-precision floating-point format. The conversion is performed
1256| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1257*----------------------------------------------------------------------------*/
1258
Peter Maydellc4850f92014-01-07 17:17:49 +00001259float128 int32_to_float128(int32_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001260{
1261 flag zSign;
1262 uint32 absA;
1263 int8 shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001264 uint64_t zSig0;
bellard158142c2005-03-13 16:54:06 +00001265
1266 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1267 zSign = ( a < 0 );
1268 absA = zSign ? - a : a;
1269 shiftCount = countLeadingZeros32( absA ) + 17;
1270 zSig0 = absA;
1271 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1272
1273}
1274
bellard158142c2005-03-13 16:54:06 +00001275/*----------------------------------------------------------------------------
1276| Returns the result of converting the 64-bit two's complement integer `a'
1277| to the single-precision floating-point format. The conversion is performed
1278| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1279*----------------------------------------------------------------------------*/
1280
Peter Maydellc4850f92014-01-07 17:17:49 +00001281float32 int64_to_float32(int64_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001282{
1283 flag zSign;
1284 uint64 absA;
1285 int8 shiftCount;
1286
pbrookf090c9d2007-11-18 14:33:24 +00001287 if ( a == 0 ) return float32_zero;
bellard158142c2005-03-13 16:54:06 +00001288 zSign = ( a < 0 );
1289 absA = zSign ? - a : a;
1290 shiftCount = countLeadingZeros64( absA ) - 40;
1291 if ( 0 <= shiftCount ) {
1292 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1293 }
1294 else {
1295 shiftCount += 7;
1296 if ( shiftCount < 0 ) {
1297 shift64RightJamming( absA, - shiftCount, &absA );
1298 }
1299 else {
1300 absA <<= shiftCount;
1301 }
1302 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
1303 }
1304
1305}
1306
Peter Maydellc4850f92014-01-07 17:17:49 +00001307float32 uint64_to_float32(uint64_t a STATUS_PARAM)
j_mayer75d62a52007-03-20 22:10:42 +00001308{
1309 int8 shiftCount;
1310
pbrookf090c9d2007-11-18 14:33:24 +00001311 if ( a == 0 ) return float32_zero;
j_mayer75d62a52007-03-20 22:10:42 +00001312 shiftCount = countLeadingZeros64( a ) - 40;
1313 if ( 0 <= shiftCount ) {
Peter Maydelle744c062012-09-28 16:17:03 +01001314 return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
j_mayer75d62a52007-03-20 22:10:42 +00001315 }
1316 else {
1317 shiftCount += 7;
1318 if ( shiftCount < 0 ) {
1319 shift64RightJamming( a, - shiftCount, &a );
1320 }
1321 else {
1322 a <<= shiftCount;
1323 }
Peter Maydelle744c062012-09-28 16:17:03 +01001324 return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00001325 }
1326}
1327
bellard158142c2005-03-13 16:54:06 +00001328/*----------------------------------------------------------------------------
1329| Returns the result of converting the 64-bit two's complement integer `a'
1330| to the double-precision floating-point format. The conversion is performed
1331| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1332*----------------------------------------------------------------------------*/
1333
Peter Maydellc4850f92014-01-07 17:17:49 +00001334float64 int64_to_float64(int64_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001335{
1336 flag zSign;
1337
pbrookf090c9d2007-11-18 14:33:24 +00001338 if ( a == 0 ) return float64_zero;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001339 if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
bellard158142c2005-03-13 16:54:06 +00001340 return packFloat64( 1, 0x43E, 0 );
1341 }
1342 zSign = ( a < 0 );
1343 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
1344
1345}
1346
Peter Maydellc4850f92014-01-07 17:17:49 +00001347float64 uint64_to_float64(uint64_t a STATUS_PARAM)
j_mayer75d62a52007-03-20 22:10:42 +00001348{
Richard Henderson17ed2292012-12-31 10:09:03 -08001349 int exp = 0x43C;
j_mayer75d62a52007-03-20 22:10:42 +00001350
Richard Henderson17ed2292012-12-31 10:09:03 -08001351 if (a == 0) {
1352 return float64_zero;
1353 }
1354 if ((int64_t)a < 0) {
1355 shift64RightJamming(a, 1, &a);
1356 exp += 1;
1357 }
1358 return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00001359}
1360
bellard158142c2005-03-13 16:54:06 +00001361/*----------------------------------------------------------------------------
1362| Returns the result of converting the 64-bit two's complement integer `a'
1363| to the extended double-precision floating-point format. The conversion
1364| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1365| Arithmetic.
1366*----------------------------------------------------------------------------*/
1367
Peter Maydellc4850f92014-01-07 17:17:49 +00001368floatx80 int64_to_floatx80(int64_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001369{
1370 flag zSign;
1371 uint64 absA;
1372 int8 shiftCount;
1373
1374 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1375 zSign = ( a < 0 );
1376 absA = zSign ? - a : a;
1377 shiftCount = countLeadingZeros64( absA );
1378 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1379
1380}
1381
bellard158142c2005-03-13 16:54:06 +00001382/*----------------------------------------------------------------------------
1383| Returns the result of converting the 64-bit two's complement integer `a' to
1384| the quadruple-precision floating-point format. The conversion is performed
1385| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1386*----------------------------------------------------------------------------*/
1387
Peter Maydellc4850f92014-01-07 17:17:49 +00001388float128 int64_to_float128(int64_t a STATUS_PARAM)
bellard158142c2005-03-13 16:54:06 +00001389{
1390 flag zSign;
1391 uint64 absA;
1392 int8 shiftCount;
1393 int32 zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001394 uint64_t zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00001395
1396 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1397 zSign = ( a < 0 );
1398 absA = zSign ? - a : a;
1399 shiftCount = countLeadingZeros64( absA ) + 49;
1400 zExp = 0x406E - shiftCount;
1401 if ( 64 <= shiftCount ) {
1402 zSig1 = 0;
1403 zSig0 = absA;
1404 shiftCount -= 64;
1405 }
1406 else {
1407 zSig1 = absA;
1408 zSig0 = 0;
1409 }
1410 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1411 return packFloat128( zSign, zExp, zSig0, zSig1 );
1412
1413}
1414
Peter Maydellc4850f92014-01-07 17:17:49 +00001415float128 uint64_to_float128(uint64_t a STATUS_PARAM)
Richard Henderson1e397ea2012-12-31 10:09:04 -08001416{
1417 if (a == 0) {
1418 return float128_zero;
1419 }
1420 return normalizeRoundAndPackFloat128(0, 0x406E, a, 0 STATUS_VAR);
1421}
1422
bellard158142c2005-03-13 16:54:06 +00001423/*----------------------------------------------------------------------------
1424| Returns the result of converting the single-precision floating-point value
1425| `a' to the 32-bit two's complement integer format. The conversion is
1426| performed according to the IEC/IEEE Standard for Binary Floating-Point
1427| Arithmetic---which means in particular that the conversion is rounded
1428| according to the current rounding mode. If `a' is a NaN, the largest
1429| positive integer is returned. Otherwise, if the conversion overflows, the
1430| largest integer with the same sign as `a' is returned.
1431*----------------------------------------------------------------------------*/
1432
1433int32 float32_to_int32( float32 a STATUS_PARAM )
1434{
1435 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001436 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001437 uint32_t aSig;
1438 uint64_t aSig64;
bellard158142c2005-03-13 16:54:06 +00001439
Peter Maydell37d18662011-01-06 19:37:53 +00001440 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001441 aSig = extractFloat32Frac( a );
1442 aExp = extractFloat32Exp( a );
1443 aSign = extractFloat32Sign( a );
1444 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1445 if ( aExp ) aSig |= 0x00800000;
1446 shiftCount = 0xAF - aExp;
1447 aSig64 = aSig;
1448 aSig64 <<= 32;
1449 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1450 return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
1451
1452}
1453
1454/*----------------------------------------------------------------------------
1455| Returns the result of converting the single-precision floating-point value
1456| `a' to the 32-bit two's complement integer format. The conversion is
1457| performed according to the IEC/IEEE Standard for Binary Floating-Point
1458| Arithmetic, except that the conversion is always rounded toward zero.
1459| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1460| the conversion overflows, the largest integer with the same sign as `a' is
1461| returned.
1462*----------------------------------------------------------------------------*/
1463
1464int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
1465{
1466 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001467 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001468 uint32_t aSig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01001469 int32_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00001470 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001471
1472 aSig = extractFloat32Frac( a );
1473 aExp = extractFloat32Exp( a );
1474 aSign = extractFloat32Sign( a );
1475 shiftCount = aExp - 0x9E;
1476 if ( 0 <= shiftCount ) {
pbrookf090c9d2007-11-18 14:33:24 +00001477 if ( float32_val(a) != 0xCF000000 ) {
bellard158142c2005-03-13 16:54:06 +00001478 float_raise( float_flag_invalid STATUS_VAR);
1479 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1480 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001481 return (int32_t) 0x80000000;
bellard158142c2005-03-13 16:54:06 +00001482 }
1483 else if ( aExp <= 0x7E ) {
1484 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1485 return 0;
1486 }
1487 aSig = ( aSig | 0x00800000 )<<8;
1488 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001489 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00001490 STATUS(float_exception_flags) |= float_flag_inexact;
1491 }
1492 if ( aSign ) z = - z;
1493 return z;
1494
1495}
1496
1497/*----------------------------------------------------------------------------
1498| Returns the result of converting the single-precision floating-point value
Peter Maydellcbcef452010-12-07 15:37:34 +00001499| `a' to the 16-bit two's complement integer format. The conversion is
1500| performed according to the IEC/IEEE Standard for Binary Floating-Point
1501| Arithmetic, except that the conversion is always rounded toward zero.
1502| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1503| the conversion overflows, the largest integer with the same sign as `a' is
1504| returned.
1505*----------------------------------------------------------------------------*/
1506
Andreas Färber94a49d82012-04-26 00:15:56 +02001507int_fast16_t float32_to_int16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00001508{
1509 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001510 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001511 uint32_t aSig;
Peter Maydellcbcef452010-12-07 15:37:34 +00001512 int32 z;
1513
1514 aSig = extractFloat32Frac( a );
1515 aExp = extractFloat32Exp( a );
1516 aSign = extractFloat32Sign( a );
1517 shiftCount = aExp - 0x8E;
1518 if ( 0 <= shiftCount ) {
1519 if ( float32_val(a) != 0xC7000000 ) {
1520 float_raise( float_flag_invalid STATUS_VAR);
1521 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1522 return 0x7FFF;
1523 }
1524 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001525 return (int32_t) 0xffff8000;
Peter Maydellcbcef452010-12-07 15:37:34 +00001526 }
1527 else if ( aExp <= 0x7E ) {
1528 if ( aExp | aSig ) {
1529 STATUS(float_exception_flags) |= float_flag_inexact;
1530 }
1531 return 0;
1532 }
1533 shiftCount -= 0x10;
1534 aSig = ( aSig | 0x00800000 )<<8;
1535 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001536 if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
Peter Maydellcbcef452010-12-07 15:37:34 +00001537 STATUS(float_exception_flags) |= float_flag_inexact;
1538 }
1539 if ( aSign ) {
1540 z = - z;
1541 }
1542 return z;
1543
1544}
1545
1546/*----------------------------------------------------------------------------
1547| Returns the result of converting the single-precision floating-point value
bellard158142c2005-03-13 16:54:06 +00001548| `a' to the 64-bit two's complement integer format. The conversion is
1549| performed according to the IEC/IEEE Standard for Binary Floating-Point
1550| Arithmetic---which means in particular that the conversion is rounded
1551| according to the current rounding mode. If `a' is a NaN, the largest
1552| positive integer is returned. Otherwise, if the conversion overflows, the
1553| largest integer with the same sign as `a' is returned.
1554*----------------------------------------------------------------------------*/
1555
1556int64 float32_to_int64( float32 a STATUS_PARAM )
1557{
1558 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001559 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001560 uint32_t aSig;
1561 uint64_t aSig64, aSigExtra;
Peter Maydell37d18662011-01-06 19:37:53 +00001562 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001563
1564 aSig = extractFloat32Frac( a );
1565 aExp = extractFloat32Exp( a );
1566 aSign = extractFloat32Sign( a );
1567 shiftCount = 0xBE - aExp;
1568 if ( shiftCount < 0 ) {
1569 float_raise( float_flag_invalid STATUS_VAR);
1570 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1571 return LIT64( 0x7FFFFFFFFFFFFFFF );
1572 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001573 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00001574 }
1575 if ( aExp ) aSig |= 0x00800000;
1576 aSig64 = aSig;
1577 aSig64 <<= 40;
1578 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1579 return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
1580
1581}
1582
1583/*----------------------------------------------------------------------------
1584| Returns the result of converting the single-precision floating-point value
Tom Musta2f18bbf2014-01-07 17:17:50 +00001585| `a' to the 64-bit unsigned integer format. The conversion is
1586| performed according to the IEC/IEEE Standard for Binary Floating-Point
1587| Arithmetic---which means in particular that the conversion is rounded
1588| according to the current rounding mode. If `a' is a NaN, the largest
1589| unsigned integer is returned. Otherwise, if the conversion overflows, the
1590| largest unsigned integer is returned. If the 'a' is negative, the result
1591| is rounded and zero is returned; values that do not round to zero will
1592| raise the inexact exception flag.
1593*----------------------------------------------------------------------------*/
1594
1595uint64 float32_to_uint64(float32 a STATUS_PARAM)
1596{
1597 flag aSign;
1598 int_fast16_t aExp, shiftCount;
1599 uint32_t aSig;
1600 uint64_t aSig64, aSigExtra;
1601 a = float32_squash_input_denormal(a STATUS_VAR);
1602
1603 aSig = extractFloat32Frac(a);
1604 aExp = extractFloat32Exp(a);
1605 aSign = extractFloat32Sign(a);
1606 if ((aSign) && (aExp > 126)) {
1607 float_raise(float_flag_invalid STATUS_VAR);
1608 if (float32_is_any_nan(a)) {
1609 return LIT64(0xFFFFFFFFFFFFFFFF);
1610 } else {
1611 return 0;
1612 }
1613 }
1614 shiftCount = 0xBE - aExp;
1615 if (aExp) {
1616 aSig |= 0x00800000;
1617 }
1618 if (shiftCount < 0) {
1619 float_raise(float_flag_invalid STATUS_VAR);
1620 return LIT64(0xFFFFFFFFFFFFFFFF);
1621 }
1622
1623 aSig64 = aSig;
1624 aSig64 <<= 40;
1625 shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
1626 return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
1627}
1628
1629/*----------------------------------------------------------------------------
1630| Returns the result of converting the single-precision floating-point value
Tom Mustaa13d4482014-03-31 16:03:55 -05001631| `a' to the 64-bit unsigned integer format. The conversion is
1632| performed according to the IEC/IEEE Standard for Binary Floating-Point
1633| Arithmetic, except that the conversion is always rounded toward zero. If
1634| `a' is a NaN, the largest unsigned integer is returned. Otherwise, if the
1635| conversion overflows, the largest unsigned integer is returned. If the
1636| 'a' is negative, the result is rounded and zero is returned; values that do
1637| not round to zero will raise the inexact flag.
1638*----------------------------------------------------------------------------*/
1639
1640uint64 float32_to_uint64_round_to_zero(float32 a STATUS_PARAM)
1641{
1642 signed char current_rounding_mode = STATUS(float_rounding_mode);
1643 set_float_rounding_mode(float_round_to_zero STATUS_VAR);
1644 int64_t v = float32_to_uint64(a STATUS_VAR);
1645 set_float_rounding_mode(current_rounding_mode STATUS_VAR);
1646 return v;
1647}
1648
1649/*----------------------------------------------------------------------------
1650| Returns the result of converting the single-precision floating-point value
bellard158142c2005-03-13 16:54:06 +00001651| `a' to the 64-bit two's complement integer format. The conversion is
1652| performed according to the IEC/IEEE Standard for Binary Floating-Point
1653| Arithmetic, except that the conversion is always rounded toward zero. If
1654| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
1655| conversion overflows, the largest integer with the same sign as `a' is
1656| returned.
1657*----------------------------------------------------------------------------*/
1658
1659int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
1660{
1661 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001662 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001663 uint32_t aSig;
1664 uint64_t aSig64;
bellard158142c2005-03-13 16:54:06 +00001665 int64 z;
Peter Maydell37d18662011-01-06 19:37:53 +00001666 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001667
1668 aSig = extractFloat32Frac( a );
1669 aExp = extractFloat32Exp( a );
1670 aSign = extractFloat32Sign( a );
1671 shiftCount = aExp - 0xBE;
1672 if ( 0 <= shiftCount ) {
pbrookf090c9d2007-11-18 14:33:24 +00001673 if ( float32_val(a) != 0xDF000000 ) {
bellard158142c2005-03-13 16:54:06 +00001674 float_raise( float_flag_invalid STATUS_VAR);
1675 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1676 return LIT64( 0x7FFFFFFFFFFFFFFF );
1677 }
1678 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001679 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00001680 }
1681 else if ( aExp <= 0x7E ) {
1682 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
1683 return 0;
1684 }
1685 aSig64 = aSig | 0x00800000;
1686 aSig64 <<= 40;
1687 z = aSig64>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01001688 if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00001689 STATUS(float_exception_flags) |= float_flag_inexact;
1690 }
1691 if ( aSign ) z = - z;
1692 return z;
1693
1694}
1695
1696/*----------------------------------------------------------------------------
1697| Returns the result of converting the single-precision floating-point value
1698| `a' to the double-precision floating-point format. The conversion is
1699| performed according to the IEC/IEEE Standard for Binary Floating-Point
1700| Arithmetic.
1701*----------------------------------------------------------------------------*/
1702
1703float64 float32_to_float64( float32 a STATUS_PARAM )
1704{
1705 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001706 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001707 uint32_t aSig;
Peter Maydell37d18662011-01-06 19:37:53 +00001708 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001709
1710 aSig = extractFloat32Frac( a );
1711 aExp = extractFloat32Exp( a );
1712 aSign = extractFloat32Sign( a );
1713 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001714 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001715 return packFloat64( aSign, 0x7FF, 0 );
1716 }
1717 if ( aExp == 0 ) {
1718 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1719 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1720 --aExp;
1721 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001722 return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
bellard158142c2005-03-13 16:54:06 +00001723
1724}
1725
bellard158142c2005-03-13 16:54:06 +00001726/*----------------------------------------------------------------------------
1727| Returns the result of converting the single-precision floating-point value
1728| `a' to the extended double-precision floating-point format. The conversion
1729| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1730| Arithmetic.
1731*----------------------------------------------------------------------------*/
1732
1733floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
1734{
1735 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001736 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001737 uint32_t aSig;
bellard158142c2005-03-13 16:54:06 +00001738
Peter Maydell37d18662011-01-06 19:37:53 +00001739 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001740 aSig = extractFloat32Frac( a );
1741 aExp = extractFloat32Exp( a );
1742 aSign = extractFloat32Sign( a );
1743 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001744 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001745 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1746 }
1747 if ( aExp == 0 ) {
1748 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1749 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1750 }
1751 aSig |= 0x00800000;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001752 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
bellard158142c2005-03-13 16:54:06 +00001753
1754}
1755
bellard158142c2005-03-13 16:54:06 +00001756/*----------------------------------------------------------------------------
1757| Returns the result of converting the single-precision floating-point value
1758| `a' to the double-precision floating-point format. The conversion is
1759| performed according to the IEC/IEEE Standard for Binary Floating-Point
1760| Arithmetic.
1761*----------------------------------------------------------------------------*/
1762
1763float128 float32_to_float128( float32 a STATUS_PARAM )
1764{
1765 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001766 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001767 uint32_t aSig;
bellard158142c2005-03-13 16:54:06 +00001768
Peter Maydell37d18662011-01-06 19:37:53 +00001769 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001770 aSig = extractFloat32Frac( a );
1771 aExp = extractFloat32Exp( a );
1772 aSign = extractFloat32Sign( a );
1773 if ( aExp == 0xFF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00001774 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00001775 return packFloat128( aSign, 0x7FFF, 0, 0 );
1776 }
1777 if ( aExp == 0 ) {
1778 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1779 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1780 --aExp;
1781 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01001782 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
bellard158142c2005-03-13 16:54:06 +00001783
1784}
1785
bellard158142c2005-03-13 16:54:06 +00001786/*----------------------------------------------------------------------------
1787| Rounds the single-precision floating-point value `a' to an integer, and
1788| returns the result as a single-precision floating-point value. The
1789| operation is performed according to the IEC/IEEE Standard for Binary
1790| Floating-Point Arithmetic.
1791*----------------------------------------------------------------------------*/
1792
1793float32 float32_round_to_int( float32 a STATUS_PARAM)
1794{
1795 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02001796 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001797 uint32_t lastBitMask, roundBitsMask;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001798 uint32_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00001799 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00001800
1801 aExp = extractFloat32Exp( a );
1802 if ( 0x96 <= aExp ) {
1803 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1804 return propagateFloat32NaN( a, a STATUS_VAR );
1805 }
1806 return a;
1807 }
1808 if ( aExp <= 0x7E ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01001809 if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00001810 STATUS(float_exception_flags) |= float_flag_inexact;
1811 aSign = extractFloat32Sign( a );
1812 switch ( STATUS(float_rounding_mode) ) {
1813 case float_round_nearest_even:
1814 if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1815 return packFloat32( aSign, 0x7F, 0 );
1816 }
1817 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00001818 case float_round_ties_away:
1819 if (aExp == 0x7E) {
1820 return packFloat32(aSign, 0x7F, 0);
1821 }
1822 break;
bellard158142c2005-03-13 16:54:06 +00001823 case float_round_down:
pbrookf090c9d2007-11-18 14:33:24 +00001824 return make_float32(aSign ? 0xBF800000 : 0);
bellard158142c2005-03-13 16:54:06 +00001825 case float_round_up:
pbrookf090c9d2007-11-18 14:33:24 +00001826 return make_float32(aSign ? 0x80000000 : 0x3F800000);
bellard158142c2005-03-13 16:54:06 +00001827 }
1828 return packFloat32( aSign, 0, 0 );
1829 }
1830 lastBitMask = 1;
1831 lastBitMask <<= 0x96 - aExp;
1832 roundBitsMask = lastBitMask - 1;
pbrookf090c9d2007-11-18 14:33:24 +00001833 z = float32_val(a);
Peter Maydelldc355b72014-01-07 17:19:12 +00001834 switch (STATUS(float_rounding_mode)) {
1835 case float_round_nearest_even:
bellard158142c2005-03-13 16:54:06 +00001836 z += lastBitMask>>1;
Peter Maydelldc355b72014-01-07 17:19:12 +00001837 if ((z & roundBitsMask) == 0) {
1838 z &= ~lastBitMask;
1839 }
1840 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00001841 case float_round_ties_away:
1842 z += lastBitMask >> 1;
1843 break;
Peter Maydelldc355b72014-01-07 17:19:12 +00001844 case float_round_to_zero:
1845 break;
1846 case float_round_up:
1847 if (!extractFloat32Sign(make_float32(z))) {
bellard158142c2005-03-13 16:54:06 +00001848 z += roundBitsMask;
1849 }
Peter Maydelldc355b72014-01-07 17:19:12 +00001850 break;
1851 case float_round_down:
1852 if (extractFloat32Sign(make_float32(z))) {
1853 z += roundBitsMask;
1854 }
1855 break;
1856 default:
1857 abort();
bellard158142c2005-03-13 16:54:06 +00001858 }
1859 z &= ~ roundBitsMask;
pbrookf090c9d2007-11-18 14:33:24 +00001860 if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
1861 return make_float32(z);
bellard158142c2005-03-13 16:54:06 +00001862
1863}
1864
1865/*----------------------------------------------------------------------------
1866| Returns the result of adding the absolute values of the single-precision
1867| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
1868| before being returned. `zSign' is ignored if the result is a NaN.
1869| The addition is performed according to the IEC/IEEE Standard for Binary
1870| Floating-Point Arithmetic.
1871*----------------------------------------------------------------------------*/
1872
1873static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1874{
Andreas Färber94a49d82012-04-26 00:15:56 +02001875 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001876 uint32_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02001877 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00001878
1879 aSig = extractFloat32Frac( a );
1880 aExp = extractFloat32Exp( a );
1881 bSig = extractFloat32Frac( b );
1882 bExp = extractFloat32Exp( b );
1883 expDiff = aExp - bExp;
1884 aSig <<= 6;
1885 bSig <<= 6;
1886 if ( 0 < expDiff ) {
1887 if ( aExp == 0xFF ) {
1888 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1889 return a;
1890 }
1891 if ( bExp == 0 ) {
1892 --expDiff;
1893 }
1894 else {
1895 bSig |= 0x20000000;
1896 }
1897 shift32RightJamming( bSig, expDiff, &bSig );
1898 zExp = aExp;
1899 }
1900 else if ( expDiff < 0 ) {
1901 if ( bExp == 0xFF ) {
1902 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1903 return packFloat32( zSign, 0xFF, 0 );
1904 }
1905 if ( aExp == 0 ) {
1906 ++expDiff;
1907 }
1908 else {
1909 aSig |= 0x20000000;
1910 }
1911 shift32RightJamming( aSig, - expDiff, &aSig );
1912 zExp = bExp;
1913 }
1914 else {
1915 if ( aExp == 0xFF ) {
1916 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1917 return a;
1918 }
pbrookfe76d972008-12-19 14:33:59 +00001919 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01001920 if (STATUS(flush_to_zero)) {
1921 if (aSig | bSig) {
1922 float_raise(float_flag_output_denormal STATUS_VAR);
1923 }
1924 return packFloat32(zSign, 0, 0);
1925 }
pbrookfe76d972008-12-19 14:33:59 +00001926 return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1927 }
bellard158142c2005-03-13 16:54:06 +00001928 zSig = 0x40000000 + aSig + bSig;
1929 zExp = aExp;
1930 goto roundAndPack;
1931 }
1932 aSig |= 0x20000000;
1933 zSig = ( aSig + bSig )<<1;
1934 --zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001935 if ( (int32_t) zSig < 0 ) {
bellard158142c2005-03-13 16:54:06 +00001936 zSig = aSig + bSig;
1937 ++zExp;
1938 }
1939 roundAndPack:
1940 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
1941
1942}
1943
1944/*----------------------------------------------------------------------------
1945| Returns the result of subtracting the absolute values of the single-
1946| precision floating-point values `a' and `b'. If `zSign' is 1, the
1947| difference is negated before being returned. `zSign' is ignored if the
1948| result is a NaN. The subtraction is performed according to the IEC/IEEE
1949| Standard for Binary Floating-Point Arithmetic.
1950*----------------------------------------------------------------------------*/
1951
1952static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
1953{
Andreas Färber94a49d82012-04-26 00:15:56 +02001954 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01001955 uint32_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02001956 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00001957
1958 aSig = extractFloat32Frac( a );
1959 aExp = extractFloat32Exp( a );
1960 bSig = extractFloat32Frac( b );
1961 bExp = extractFloat32Exp( b );
1962 expDiff = aExp - bExp;
1963 aSig <<= 7;
1964 bSig <<= 7;
1965 if ( 0 < expDiff ) goto aExpBigger;
1966 if ( expDiff < 0 ) goto bExpBigger;
1967 if ( aExp == 0xFF ) {
1968 if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1969 float_raise( float_flag_invalid STATUS_VAR);
1970 return float32_default_nan;
1971 }
1972 if ( aExp == 0 ) {
1973 aExp = 1;
1974 bExp = 1;
1975 }
1976 if ( bSig < aSig ) goto aBigger;
1977 if ( aSig < bSig ) goto bBigger;
1978 return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
1979 bExpBigger:
1980 if ( bExp == 0xFF ) {
1981 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
1982 return packFloat32( zSign ^ 1, 0xFF, 0 );
1983 }
1984 if ( aExp == 0 ) {
1985 ++expDiff;
1986 }
1987 else {
1988 aSig |= 0x40000000;
1989 }
1990 shift32RightJamming( aSig, - expDiff, &aSig );
1991 bSig |= 0x40000000;
1992 bBigger:
1993 zSig = bSig - aSig;
1994 zExp = bExp;
1995 zSign ^= 1;
1996 goto normalizeRoundAndPack;
1997 aExpBigger:
1998 if ( aExp == 0xFF ) {
1999 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2000 return a;
2001 }
2002 if ( bExp == 0 ) {
2003 --expDiff;
2004 }
2005 else {
2006 bSig |= 0x40000000;
2007 }
2008 shift32RightJamming( bSig, expDiff, &bSig );
2009 aSig |= 0x40000000;
2010 aBigger:
2011 zSig = aSig - bSig;
2012 zExp = aExp;
2013 normalizeRoundAndPack:
2014 --zExp;
2015 return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2016
2017}
2018
2019/*----------------------------------------------------------------------------
2020| Returns the result of adding the single-precision floating-point values `a'
2021| and `b'. The operation is performed according to the IEC/IEEE Standard for
2022| Binary Floating-Point Arithmetic.
2023*----------------------------------------------------------------------------*/
2024
2025float32 float32_add( float32 a, float32 b STATUS_PARAM )
2026{
2027 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00002028 a = float32_squash_input_denormal(a STATUS_VAR);
2029 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002030
2031 aSign = extractFloat32Sign( a );
2032 bSign = extractFloat32Sign( b );
2033 if ( aSign == bSign ) {
2034 return addFloat32Sigs( a, b, aSign STATUS_VAR);
2035 }
2036 else {
2037 return subFloat32Sigs( a, b, aSign STATUS_VAR );
2038 }
2039
2040}
2041
2042/*----------------------------------------------------------------------------
2043| Returns the result of subtracting the single-precision floating-point values
2044| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
2045| for Binary Floating-Point Arithmetic.
2046*----------------------------------------------------------------------------*/
2047
2048float32 float32_sub( float32 a, float32 b STATUS_PARAM )
2049{
2050 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00002051 a = float32_squash_input_denormal(a STATUS_VAR);
2052 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002053
2054 aSign = extractFloat32Sign( a );
2055 bSign = extractFloat32Sign( b );
2056 if ( aSign == bSign ) {
2057 return subFloat32Sigs( a, b, aSign STATUS_VAR );
2058 }
2059 else {
2060 return addFloat32Sigs( a, b, aSign STATUS_VAR );
2061 }
2062
2063}
2064
2065/*----------------------------------------------------------------------------
2066| Returns the result of multiplying the single-precision floating-point values
2067| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
2068| for Binary Floating-Point Arithmetic.
2069*----------------------------------------------------------------------------*/
2070
2071float32 float32_mul( float32 a, float32 b STATUS_PARAM )
2072{
2073 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002074 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002075 uint32_t aSig, bSig;
2076 uint64_t zSig64;
2077 uint32_t zSig;
bellard158142c2005-03-13 16:54:06 +00002078
Peter Maydell37d18662011-01-06 19:37:53 +00002079 a = float32_squash_input_denormal(a STATUS_VAR);
2080 b = float32_squash_input_denormal(b STATUS_VAR);
2081
bellard158142c2005-03-13 16:54:06 +00002082 aSig = extractFloat32Frac( a );
2083 aExp = extractFloat32Exp( a );
2084 aSign = extractFloat32Sign( a );
2085 bSig = extractFloat32Frac( b );
2086 bExp = extractFloat32Exp( b );
2087 bSign = extractFloat32Sign( b );
2088 zSign = aSign ^ bSign;
2089 if ( aExp == 0xFF ) {
2090 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2091 return propagateFloat32NaN( a, b STATUS_VAR );
2092 }
2093 if ( ( bExp | bSig ) == 0 ) {
2094 float_raise( float_flag_invalid STATUS_VAR);
2095 return float32_default_nan;
2096 }
2097 return packFloat32( zSign, 0xFF, 0 );
2098 }
2099 if ( bExp == 0xFF ) {
2100 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2101 if ( ( aExp | aSig ) == 0 ) {
2102 float_raise( float_flag_invalid STATUS_VAR);
2103 return float32_default_nan;
2104 }
2105 return packFloat32( zSign, 0xFF, 0 );
2106 }
2107 if ( aExp == 0 ) {
2108 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2109 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2110 }
2111 if ( bExp == 0 ) {
2112 if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
2113 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2114 }
2115 zExp = aExp + bExp - 0x7F;
2116 aSig = ( aSig | 0x00800000 )<<7;
2117 bSig = ( bSig | 0x00800000 )<<8;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002118 shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
bellard158142c2005-03-13 16:54:06 +00002119 zSig = zSig64;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002120 if ( 0 <= (int32_t) ( zSig<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00002121 zSig <<= 1;
2122 --zExp;
2123 }
2124 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2125
2126}
2127
2128/*----------------------------------------------------------------------------
2129| Returns the result of dividing the single-precision floating-point value `a'
2130| by the corresponding value `b'. The operation is performed according to the
2131| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2132*----------------------------------------------------------------------------*/
2133
2134float32 float32_div( float32 a, float32 b STATUS_PARAM )
2135{
2136 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002137 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002138 uint32_t aSig, bSig, zSig;
Peter Maydell37d18662011-01-06 19:37:53 +00002139 a = float32_squash_input_denormal(a STATUS_VAR);
2140 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002141
2142 aSig = extractFloat32Frac( a );
2143 aExp = extractFloat32Exp( a );
2144 aSign = extractFloat32Sign( a );
2145 bSig = extractFloat32Frac( b );
2146 bExp = extractFloat32Exp( b );
2147 bSign = extractFloat32Sign( b );
2148 zSign = aSign ^ bSign;
2149 if ( aExp == 0xFF ) {
2150 if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2151 if ( bExp == 0xFF ) {
2152 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2153 float_raise( float_flag_invalid STATUS_VAR);
2154 return float32_default_nan;
2155 }
2156 return packFloat32( zSign, 0xFF, 0 );
2157 }
2158 if ( bExp == 0xFF ) {
2159 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2160 return packFloat32( zSign, 0, 0 );
2161 }
2162 if ( bExp == 0 ) {
2163 if ( bSig == 0 ) {
2164 if ( ( aExp | aSig ) == 0 ) {
2165 float_raise( float_flag_invalid STATUS_VAR);
2166 return float32_default_nan;
2167 }
2168 float_raise( float_flag_divbyzero STATUS_VAR);
2169 return packFloat32( zSign, 0xFF, 0 );
2170 }
2171 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2172 }
2173 if ( aExp == 0 ) {
2174 if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2175 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2176 }
2177 zExp = aExp - bExp + 0x7D;
2178 aSig = ( aSig | 0x00800000 )<<7;
2179 bSig = ( bSig | 0x00800000 )<<8;
2180 if ( bSig <= ( aSig + aSig ) ) {
2181 aSig >>= 1;
2182 ++zExp;
2183 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002184 zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard158142c2005-03-13 16:54:06 +00002185 if ( ( zSig & 0x3F ) == 0 ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002186 zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
bellard158142c2005-03-13 16:54:06 +00002187 }
2188 return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
2189
2190}
2191
2192/*----------------------------------------------------------------------------
2193| Returns the remainder of the single-precision floating-point value `a'
2194| with respect to the corresponding value `b'. The operation is performed
2195| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2196*----------------------------------------------------------------------------*/
2197
2198float32 float32_rem( float32 a, float32 b STATUS_PARAM )
2199{
Blue Swirled086f32010-03-07 13:49:58 +00002200 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002201 int_fast16_t aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002202 uint32_t aSig, bSig;
2203 uint32_t q;
2204 uint64_t aSig64, bSig64, q64;
2205 uint32_t alternateASig;
2206 int32_t sigMean;
Peter Maydell37d18662011-01-06 19:37:53 +00002207 a = float32_squash_input_denormal(a STATUS_VAR);
2208 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002209
2210 aSig = extractFloat32Frac( a );
2211 aExp = extractFloat32Exp( a );
2212 aSign = extractFloat32Sign( a );
2213 bSig = extractFloat32Frac( b );
2214 bExp = extractFloat32Exp( b );
bellard158142c2005-03-13 16:54:06 +00002215 if ( aExp == 0xFF ) {
2216 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2217 return propagateFloat32NaN( a, b STATUS_VAR );
2218 }
2219 float_raise( float_flag_invalid STATUS_VAR);
2220 return float32_default_nan;
2221 }
2222 if ( bExp == 0xFF ) {
2223 if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
2224 return a;
2225 }
2226 if ( bExp == 0 ) {
2227 if ( bSig == 0 ) {
2228 float_raise( float_flag_invalid STATUS_VAR);
2229 return float32_default_nan;
2230 }
2231 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2232 }
2233 if ( aExp == 0 ) {
2234 if ( aSig == 0 ) return a;
2235 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2236 }
2237 expDiff = aExp - bExp;
2238 aSig |= 0x00800000;
2239 bSig |= 0x00800000;
2240 if ( expDiff < 32 ) {
2241 aSig <<= 8;
2242 bSig <<= 8;
2243 if ( expDiff < 0 ) {
2244 if ( expDiff < -1 ) return a;
2245 aSig >>= 1;
2246 }
2247 q = ( bSig <= aSig );
2248 if ( q ) aSig -= bSig;
2249 if ( 0 < expDiff ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002250 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
bellard158142c2005-03-13 16:54:06 +00002251 q >>= 32 - expDiff;
2252 bSig >>= 2;
2253 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2254 }
2255 else {
2256 aSig >>= 2;
2257 bSig >>= 2;
2258 }
2259 }
2260 else {
2261 if ( bSig <= aSig ) aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002262 aSig64 = ( (uint64_t) aSig )<<40;
2263 bSig64 = ( (uint64_t) bSig )<<40;
bellard158142c2005-03-13 16:54:06 +00002264 expDiff -= 64;
2265 while ( 0 < expDiff ) {
2266 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2267 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2268 aSig64 = - ( ( bSig * q64 )<<38 );
2269 expDiff -= 62;
2270 }
2271 expDiff += 64;
2272 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2273 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2274 q = q64>>( 64 - expDiff );
2275 bSig <<= 6;
2276 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2277 }
2278 do {
2279 alternateASig = aSig;
2280 ++q;
2281 aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002282 } while ( 0 <= (int32_t) aSig );
bellard158142c2005-03-13 16:54:06 +00002283 sigMean = aSig + alternateASig;
2284 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2285 aSig = alternateASig;
2286 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01002287 zSign = ( (int32_t) aSig < 0 );
bellard158142c2005-03-13 16:54:06 +00002288 if ( zSign ) aSig = - aSig;
2289 return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
2290
2291}
2292
2293/*----------------------------------------------------------------------------
Peter Maydell369be8f2011-10-19 16:14:06 +00002294| Returns the result of multiplying the single-precision floating-point values
2295| `a' and `b' then adding 'c', with no intermediate rounding step after the
2296| multiplication. The operation is performed according to the IEC/IEEE
2297| Standard for Binary Floating-Point Arithmetic 754-2008.
2298| The flags argument allows the caller to select negation of the
2299| addend, the intermediate product, or the final result. (The difference
2300| between this and having the caller do a separate negation is that negating
2301| externally will flip the sign bit on NaNs.)
2302*----------------------------------------------------------------------------*/
2303
2304float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
2305{
2306 flag aSign, bSign, cSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002307 int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell369be8f2011-10-19 16:14:06 +00002308 uint32_t aSig, bSig, cSig;
2309 flag pInf, pZero, pSign;
2310 uint64_t pSig64, cSig64, zSig64;
2311 uint32_t pSig;
2312 int shiftcount;
2313 flag signflip, infzero;
2314
2315 a = float32_squash_input_denormal(a STATUS_VAR);
2316 b = float32_squash_input_denormal(b STATUS_VAR);
2317 c = float32_squash_input_denormal(c STATUS_VAR);
2318 aSig = extractFloat32Frac(a);
2319 aExp = extractFloat32Exp(a);
2320 aSign = extractFloat32Sign(a);
2321 bSig = extractFloat32Frac(b);
2322 bExp = extractFloat32Exp(b);
2323 bSign = extractFloat32Sign(b);
2324 cSig = extractFloat32Frac(c);
2325 cExp = extractFloat32Exp(c);
2326 cSign = extractFloat32Sign(c);
2327
2328 infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
2329 (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
2330
2331 /* It is implementation-defined whether the cases of (0,inf,qnan)
2332 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
2333 * they return if they do), so we have to hand this information
2334 * off to the target-specific pick-a-NaN routine.
2335 */
2336 if (((aExp == 0xff) && aSig) ||
2337 ((bExp == 0xff) && bSig) ||
2338 ((cExp == 0xff) && cSig)) {
2339 return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
2340 }
2341
2342 if (infzero) {
2343 float_raise(float_flag_invalid STATUS_VAR);
2344 return float32_default_nan;
2345 }
2346
2347 if (flags & float_muladd_negate_c) {
2348 cSign ^= 1;
2349 }
2350
2351 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
2352
2353 /* Work out the sign and type of the product */
2354 pSign = aSign ^ bSign;
2355 if (flags & float_muladd_negate_product) {
2356 pSign ^= 1;
2357 }
2358 pInf = (aExp == 0xff) || (bExp == 0xff);
2359 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
2360
2361 if (cExp == 0xff) {
2362 if (pInf && (pSign ^ cSign)) {
2363 /* addition of opposite-signed infinities => InvalidOperation */
2364 float_raise(float_flag_invalid STATUS_VAR);
2365 return float32_default_nan;
2366 }
2367 /* Otherwise generate an infinity of the same sign */
2368 return packFloat32(cSign ^ signflip, 0xff, 0);
2369 }
2370
2371 if (pInf) {
2372 return packFloat32(pSign ^ signflip, 0xff, 0);
2373 }
2374
2375 if (pZero) {
2376 if (cExp == 0) {
2377 if (cSig == 0) {
2378 /* Adding two exact zeroes */
2379 if (pSign == cSign) {
2380 zSign = pSign;
2381 } else if (STATUS(float_rounding_mode) == float_round_down) {
2382 zSign = 1;
2383 } else {
2384 zSign = 0;
2385 }
2386 return packFloat32(zSign ^ signflip, 0, 0);
2387 }
2388 /* Exact zero plus a denorm */
2389 if (STATUS(flush_to_zero)) {
2390 float_raise(float_flag_output_denormal STATUS_VAR);
2391 return packFloat32(cSign ^ signflip, 0, 0);
2392 }
2393 }
2394 /* Zero plus something non-zero : just return the something */
Peter Maydell67d43532014-02-20 10:35:50 +00002395 if (flags & float_muladd_halve_result) {
2396 if (cExp == 0) {
2397 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2398 }
2399 /* Subtract one to halve, and one again because roundAndPackFloat32
2400 * wants one less than the true exponent.
2401 */
2402 cExp -= 2;
2403 cSig = (cSig | 0x00800000) << 7;
2404 return roundAndPackFloat32(cSign ^ signflip, cExp, cSig STATUS_VAR);
2405 }
Richard Sandiforda6e7c182013-01-22 17:03:05 +00002406 return packFloat32(cSign ^ signflip, cExp, cSig);
Peter Maydell369be8f2011-10-19 16:14:06 +00002407 }
2408
2409 if (aExp == 0) {
2410 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
2411 }
2412 if (bExp == 0) {
2413 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
2414 }
2415
2416 /* Calculate the actual result a * b + c */
2417
2418 /* Multiply first; this is easy. */
2419 /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
2420 * because we want the true exponent, not the "one-less-than"
2421 * flavour that roundAndPackFloat32() takes.
2422 */
2423 pExp = aExp + bExp - 0x7e;
2424 aSig = (aSig | 0x00800000) << 7;
2425 bSig = (bSig | 0x00800000) << 8;
2426 pSig64 = (uint64_t)aSig * bSig;
2427 if ((int64_t)(pSig64 << 1) >= 0) {
2428 pSig64 <<= 1;
2429 pExp--;
2430 }
2431
2432 zSign = pSign ^ signflip;
2433
2434 /* Now pSig64 is the significand of the multiply, with the explicit bit in
2435 * position 62.
2436 */
2437 if (cExp == 0) {
2438 if (!cSig) {
2439 /* Throw out the special case of c being an exact zero now */
2440 shift64RightJamming(pSig64, 32, &pSig64);
2441 pSig = pSig64;
Peter Maydell67d43532014-02-20 10:35:50 +00002442 if (flags & float_muladd_halve_result) {
2443 pExp--;
2444 }
Peter Maydell369be8f2011-10-19 16:14:06 +00002445 return roundAndPackFloat32(zSign, pExp - 1,
2446 pSig STATUS_VAR);
2447 }
2448 normalizeFloat32Subnormal(cSig, &cExp, &cSig);
2449 }
2450
2451 cSig64 = (uint64_t)cSig << (62 - 23);
2452 cSig64 |= LIT64(0x4000000000000000);
2453 expDiff = pExp - cExp;
2454
2455 if (pSign == cSign) {
2456 /* Addition */
2457 if (expDiff > 0) {
2458 /* scale c to match p */
2459 shift64RightJamming(cSig64, expDiff, &cSig64);
2460 zExp = pExp;
2461 } else if (expDiff < 0) {
2462 /* scale p to match c */
2463 shift64RightJamming(pSig64, -expDiff, &pSig64);
2464 zExp = cExp;
2465 } else {
2466 /* no scaling needed */
2467 zExp = cExp;
2468 }
2469 /* Add significands and make sure explicit bit ends up in posn 62 */
2470 zSig64 = pSig64 + cSig64;
2471 if ((int64_t)zSig64 < 0) {
2472 shift64RightJamming(zSig64, 1, &zSig64);
2473 } else {
2474 zExp--;
2475 }
2476 } else {
2477 /* Subtraction */
2478 if (expDiff > 0) {
2479 shift64RightJamming(cSig64, expDiff, &cSig64);
2480 zSig64 = pSig64 - cSig64;
2481 zExp = pExp;
2482 } else if (expDiff < 0) {
2483 shift64RightJamming(pSig64, -expDiff, &pSig64);
2484 zSig64 = cSig64 - pSig64;
2485 zExp = cExp;
2486 zSign ^= 1;
2487 } else {
2488 zExp = pExp;
2489 if (cSig64 < pSig64) {
2490 zSig64 = pSig64 - cSig64;
2491 } else if (pSig64 < cSig64) {
2492 zSig64 = cSig64 - pSig64;
2493 zSign ^= 1;
2494 } else {
2495 /* Exact zero */
2496 zSign = signflip;
2497 if (STATUS(float_rounding_mode) == float_round_down) {
2498 zSign ^= 1;
2499 }
2500 return packFloat32(zSign, 0, 0);
2501 }
2502 }
2503 --zExp;
2504 /* Normalize to put the explicit bit back into bit 62. */
2505 shiftcount = countLeadingZeros64(zSig64) - 1;
2506 zSig64 <<= shiftcount;
2507 zExp -= shiftcount;
2508 }
Peter Maydell67d43532014-02-20 10:35:50 +00002509 if (flags & float_muladd_halve_result) {
2510 zExp--;
2511 }
2512
Peter Maydell369be8f2011-10-19 16:14:06 +00002513 shift64RightJamming(zSig64, 32, &zSig64);
2514 return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
2515}
2516
2517
2518/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002519| Returns the square root of the single-precision floating-point value `a'.
2520| The operation is performed according to the IEC/IEEE Standard for Binary
2521| Floating-Point Arithmetic.
2522*----------------------------------------------------------------------------*/
2523
2524float32 float32_sqrt( float32 a STATUS_PARAM )
2525{
2526 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002527 int_fast16_t aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002528 uint32_t aSig, zSig;
2529 uint64_t rem, term;
Peter Maydell37d18662011-01-06 19:37:53 +00002530 a = float32_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002531
2532 aSig = extractFloat32Frac( a );
2533 aExp = extractFloat32Exp( a );
2534 aSign = extractFloat32Sign( a );
2535 if ( aExp == 0xFF ) {
pbrookf090c9d2007-11-18 14:33:24 +00002536 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00002537 if ( ! aSign ) return a;
2538 float_raise( float_flag_invalid STATUS_VAR);
2539 return float32_default_nan;
2540 }
2541 if ( aSign ) {
2542 if ( ( aExp | aSig ) == 0 ) return a;
2543 float_raise( float_flag_invalid STATUS_VAR);
2544 return float32_default_nan;
2545 }
2546 if ( aExp == 0 ) {
pbrookf090c9d2007-11-18 14:33:24 +00002547 if ( aSig == 0 ) return float32_zero;
bellard158142c2005-03-13 16:54:06 +00002548 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2549 }
2550 zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2551 aSig = ( aSig | 0x00800000 )<<8;
2552 zSig = estimateSqrt32( aExp, aSig ) + 2;
2553 if ( ( zSig & 0x7F ) <= 5 ) {
2554 if ( zSig < 2 ) {
2555 zSig = 0x7FFFFFFF;
2556 goto roundAndPack;
2557 }
2558 aSig >>= aExp & 1;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002559 term = ( (uint64_t) zSig ) * zSig;
2560 rem = ( ( (uint64_t) aSig )<<32 ) - term;
2561 while ( (int64_t) rem < 0 ) {
bellard158142c2005-03-13 16:54:06 +00002562 --zSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002563 rem += ( ( (uint64_t) zSig )<<1 ) | 1;
bellard158142c2005-03-13 16:54:06 +00002564 }
2565 zSig |= ( rem != 0 );
2566 }
2567 shift32RightJamming( zSig, 1, &zSig );
2568 roundAndPack:
2569 return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
2570
2571}
2572
2573/*----------------------------------------------------------------------------
Aurelien Jarno8229c992009-02-05 12:04:05 +01002574| Returns the binary exponential of the single-precision floating-point value
2575| `a'. The operation is performed according to the IEC/IEEE Standard for
2576| Binary Floating-Point Arithmetic.
2577|
2578| Uses the following identities:
2579|
2580| 1. -------------------------------------------------------------------------
2581| x x*ln(2)
2582| 2 = e
2583|
2584| 2. -------------------------------------------------------------------------
2585| 2 3 4 5 n
2586| x x x x x x x
2587| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
2588| 1! 2! 3! 4! 5! n!
2589*----------------------------------------------------------------------------*/
2590
2591static const float64 float32_exp2_coefficients[15] =
2592{
Peter Maydelld5138cf2011-02-10 13:59:34 +00002593 const_float64( 0x3ff0000000000000ll ), /* 1 */
2594 const_float64( 0x3fe0000000000000ll ), /* 2 */
2595 const_float64( 0x3fc5555555555555ll ), /* 3 */
2596 const_float64( 0x3fa5555555555555ll ), /* 4 */
2597 const_float64( 0x3f81111111111111ll ), /* 5 */
2598 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
2599 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
2600 const_float64( 0x3efa01a01a01a01all ), /* 8 */
2601 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
2602 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
2603 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
2604 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
2605 const_float64( 0x3de6124613a86d09ll ), /* 13 */
2606 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
2607 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
Aurelien Jarno8229c992009-02-05 12:04:05 +01002608};
2609
2610float32 float32_exp2( float32 a STATUS_PARAM )
2611{
2612 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002613 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002614 uint32_t aSig;
Aurelien Jarno8229c992009-02-05 12:04:05 +01002615 float64 r, x, xn;
2616 int i;
Peter Maydell37d18662011-01-06 19:37:53 +00002617 a = float32_squash_input_denormal(a STATUS_VAR);
Aurelien Jarno8229c992009-02-05 12:04:05 +01002618
2619 aSig = extractFloat32Frac( a );
2620 aExp = extractFloat32Exp( a );
2621 aSign = extractFloat32Sign( a );
2622
2623 if ( aExp == 0xFF) {
2624 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2625 return (aSign) ? float32_zero : a;
2626 }
2627 if (aExp == 0) {
2628 if (aSig == 0) return float32_one;
2629 }
2630
2631 float_raise( float_flag_inexact STATUS_VAR);
2632
2633 /* ******************************* */
2634 /* using float64 for approximation */
2635 /* ******************************* */
2636 x = float32_to_float64(a STATUS_VAR);
2637 x = float64_mul(x, float64_ln2 STATUS_VAR);
2638
2639 xn = x;
2640 r = float64_one;
2641 for (i = 0 ; i < 15 ; i++) {
2642 float64 f;
2643
2644 f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
2645 r = float64_add(r, f STATUS_VAR);
2646
2647 xn = float64_mul(xn, x STATUS_VAR);
2648 }
2649
2650 return float64_to_float32(r, status);
2651}
2652
2653/*----------------------------------------------------------------------------
aurel32374dfc32009-02-05 13:42:47 +00002654| Returns the binary log of the single-precision floating-point value `a'.
2655| The operation is performed according to the IEC/IEEE Standard for Binary
2656| Floating-Point Arithmetic.
2657*----------------------------------------------------------------------------*/
2658float32 float32_log2( float32 a STATUS_PARAM )
2659{
2660 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002661 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002662 uint32_t aSig, zSig, i;
aurel32374dfc32009-02-05 13:42:47 +00002663
Peter Maydell37d18662011-01-06 19:37:53 +00002664 a = float32_squash_input_denormal(a STATUS_VAR);
aurel32374dfc32009-02-05 13:42:47 +00002665 aSig = extractFloat32Frac( a );
2666 aExp = extractFloat32Exp( a );
2667 aSign = extractFloat32Sign( a );
2668
2669 if ( aExp == 0 ) {
2670 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
2671 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2672 }
2673 if ( aSign ) {
2674 float_raise( float_flag_invalid STATUS_VAR);
2675 return float32_default_nan;
2676 }
2677 if ( aExp == 0xFF ) {
2678 if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
2679 return a;
2680 }
2681
2682 aExp -= 0x7F;
2683 aSig |= 0x00800000;
2684 zSign = aExp < 0;
2685 zSig = aExp << 23;
2686
2687 for (i = 1 << 22; i > 0; i >>= 1) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01002688 aSig = ( (uint64_t)aSig * aSig ) >> 23;
aurel32374dfc32009-02-05 13:42:47 +00002689 if ( aSig & 0x01000000 ) {
2690 aSig >>= 1;
2691 zSig |= i;
2692 }
2693 }
2694
2695 if ( zSign )
2696 zSig = -zSig;
2697
2698 return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
2699}
2700
2701/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002702| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarnob6893622011-04-14 00:49:29 +02002703| the corresponding value `b', and 0 otherwise. The invalid exception is
2704| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00002705| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2706*----------------------------------------------------------------------------*/
2707
Aurelien Jarnob6893622011-04-14 00:49:29 +02002708int float32_eq( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002709{
Aurelien Jarnob6893622011-04-14 00:49:29 +02002710 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002711 a = float32_squash_input_denormal(a STATUS_VAR);
2712 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002713
2714 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2715 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2716 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02002717 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002718 return 0;
2719 }
Aurelien Jarnob6893622011-04-14 00:49:29 +02002720 av = float32_val(a);
2721 bv = float32_val(b);
2722 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00002723}
2724
2725/*----------------------------------------------------------------------------
2726| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002727| or equal to the corresponding value `b', and 0 otherwise. The invalid
2728| exception is raised if either operand is a NaN. The comparison is performed
2729| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002730*----------------------------------------------------------------------------*/
2731
bellard750afe92006-10-28 19:27:11 +00002732int float32_le( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002733{
2734 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002735 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002736 a = float32_squash_input_denormal(a STATUS_VAR);
2737 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002738
2739 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2740 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2741 ) {
2742 float_raise( float_flag_invalid STATUS_VAR);
2743 return 0;
2744 }
2745 aSign = extractFloat32Sign( a );
2746 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002747 av = float32_val(a);
2748 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002749 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002750 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002751
2752}
2753
2754/*----------------------------------------------------------------------------
2755| Returns 1 if the single-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002756| the corresponding value `b', and 0 otherwise. The invalid exception is
2757| raised if either operand is a NaN. The comparison is performed according
2758| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002759*----------------------------------------------------------------------------*/
2760
bellard750afe92006-10-28 19:27:11 +00002761int float32_lt( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002762{
2763 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002764 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002765 a = float32_squash_input_denormal(a STATUS_VAR);
2766 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002767
2768 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2769 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2770 ) {
2771 float_raise( float_flag_invalid STATUS_VAR);
2772 return 0;
2773 }
2774 aSign = extractFloat32Sign( a );
2775 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002776 av = float32_val(a);
2777 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002778 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002779 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002780
2781}
2782
2783/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02002784| Returns 1 if the single-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002785| be compared, and 0 otherwise. The invalid exception is raised if either
2786| operand is a NaN. The comparison is performed according to the IEC/IEEE
2787| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02002788*----------------------------------------------------------------------------*/
2789
2790int float32_unordered( float32 a, float32 b STATUS_PARAM )
2791{
2792 a = float32_squash_input_denormal(a STATUS_VAR);
2793 b = float32_squash_input_denormal(b STATUS_VAR);
2794
2795 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2796 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2797 ) {
2798 float_raise( float_flag_invalid STATUS_VAR);
2799 return 1;
2800 }
2801 return 0;
2802}
Aurelien Jarnob6893622011-04-14 00:49:29 +02002803
Aurelien Jarno67b78612011-04-14 00:49:29 +02002804/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002805| Returns 1 if the single-precision floating-point value `a' is equal to
Aurelien Jarnof5a64252011-04-14 00:49:30 +02002806| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2807| exception. The comparison is performed according to the IEC/IEEE Standard
2808| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00002809*----------------------------------------------------------------------------*/
2810
Aurelien Jarnob6893622011-04-14 00:49:29 +02002811int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002812{
Peter Maydell37d18662011-01-06 19:37:53 +00002813 a = float32_squash_input_denormal(a STATUS_VAR);
2814 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002815
2816 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2817 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2818 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02002819 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2820 float_raise( float_flag_invalid STATUS_VAR);
2821 }
bellard158142c2005-03-13 16:54:06 +00002822 return 0;
2823 }
Aurelien Jarnob6893622011-04-14 00:49:29 +02002824 return ( float32_val(a) == float32_val(b) ) ||
2825 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00002826}
2827
2828/*----------------------------------------------------------------------------
2829| Returns 1 if the single-precision floating-point value `a' is less than or
2830| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
2831| cause an exception. Otherwise, the comparison is performed according to the
2832| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2833*----------------------------------------------------------------------------*/
2834
bellard750afe92006-10-28 19:27:11 +00002835int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002836{
2837 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002838 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002839 a = float32_squash_input_denormal(a STATUS_VAR);
2840 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002841
2842 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2843 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2844 ) {
2845 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2846 float_raise( float_flag_invalid STATUS_VAR);
2847 }
2848 return 0;
2849 }
2850 aSign = extractFloat32Sign( a );
2851 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002852 av = float32_val(a);
2853 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002854 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002855 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002856
2857}
2858
2859/*----------------------------------------------------------------------------
2860| Returns 1 if the single-precision floating-point value `a' is less than
2861| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
2862| exception. Otherwise, the comparison is performed according to the IEC/IEEE
2863| Standard for Binary Floating-Point Arithmetic.
2864*----------------------------------------------------------------------------*/
2865
bellard750afe92006-10-28 19:27:11 +00002866int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00002867{
2868 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002869 uint32_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00002870 a = float32_squash_input_denormal(a STATUS_VAR);
2871 b = float32_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002872
2873 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2874 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2875 ) {
2876 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2877 float_raise( float_flag_invalid STATUS_VAR);
2878 }
2879 return 0;
2880 }
2881 aSign = extractFloat32Sign( a );
2882 bSign = extractFloat32Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00002883 av = float32_val(a);
2884 bv = float32_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002885 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00002886 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00002887
2888}
2889
2890/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02002891| Returns 1 if the single-precision floating-point values `a' and `b' cannot
2892| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
2893| comparison is performed according to the IEC/IEEE Standard for Binary
2894| Floating-Point Arithmetic.
2895*----------------------------------------------------------------------------*/
2896
2897int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
2898{
2899 a = float32_squash_input_denormal(a STATUS_VAR);
2900 b = float32_squash_input_denormal(b STATUS_VAR);
2901
2902 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2903 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2904 ) {
2905 if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2906 float_raise( float_flag_invalid STATUS_VAR);
2907 }
2908 return 1;
2909 }
2910 return 0;
2911}
2912
2913/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00002914| Returns the result of converting the double-precision floating-point value
2915| `a' to the 32-bit two's complement integer format. The conversion is
2916| performed according to the IEC/IEEE Standard for Binary Floating-Point
2917| Arithmetic---which means in particular that the conversion is rounded
2918| according to the current rounding mode. If `a' is a NaN, the largest
2919| positive integer is returned. Otherwise, if the conversion overflows, the
2920| largest integer with the same sign as `a' is returned.
2921*----------------------------------------------------------------------------*/
2922
2923int32 float64_to_int32( float64 a STATUS_PARAM )
2924{
2925 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002926 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002927 uint64_t aSig;
Peter Maydell37d18662011-01-06 19:37:53 +00002928 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002929
2930 aSig = extractFloat64Frac( a );
2931 aExp = extractFloat64Exp( a );
2932 aSign = extractFloat64Sign( a );
2933 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2934 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2935 shiftCount = 0x42C - aExp;
2936 if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2937 return roundAndPackInt32( aSign, aSig STATUS_VAR );
2938
2939}
2940
2941/*----------------------------------------------------------------------------
2942| Returns the result of converting the double-precision floating-point value
2943| `a' to the 32-bit two's complement integer format. The conversion is
2944| performed according to the IEC/IEEE Standard for Binary Floating-Point
2945| Arithmetic, except that the conversion is always rounded toward zero.
2946| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2947| the conversion overflows, the largest integer with the same sign as `a' is
2948| returned.
2949*----------------------------------------------------------------------------*/
2950
2951int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
2952{
2953 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02002954 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01002955 uint64_t aSig, savedASig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01002956 int32_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00002957 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00002958
2959 aSig = extractFloat64Frac( a );
2960 aExp = extractFloat64Exp( a );
2961 aSign = extractFloat64Sign( a );
2962 if ( 0x41E < aExp ) {
2963 if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2964 goto invalid;
2965 }
2966 else if ( aExp < 0x3FF ) {
2967 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
2968 return 0;
2969 }
2970 aSig |= LIT64( 0x0010000000000000 );
2971 shiftCount = 0x433 - aExp;
2972 savedASig = aSig;
2973 aSig >>= shiftCount;
2974 z = aSig;
2975 if ( aSign ) z = - z;
2976 if ( ( z < 0 ) ^ aSign ) {
2977 invalid:
2978 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01002979 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00002980 }
2981 if ( ( aSig<<shiftCount ) != savedASig ) {
2982 STATUS(float_exception_flags) |= float_flag_inexact;
2983 }
2984 return z;
2985
2986}
2987
2988/*----------------------------------------------------------------------------
2989| Returns the result of converting the double-precision floating-point value
Peter Maydellcbcef452010-12-07 15:37:34 +00002990| `a' to the 16-bit two's complement integer format. The conversion is
2991| performed according to the IEC/IEEE Standard for Binary Floating-Point
2992| Arithmetic, except that the conversion is always rounded toward zero.
2993| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
2994| the conversion overflows, the largest integer with the same sign as `a' is
2995| returned.
2996*----------------------------------------------------------------------------*/
2997
Andreas Färber94a49d82012-04-26 00:15:56 +02002998int_fast16_t float64_to_int16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00002999{
3000 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003001 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003002 uint64_t aSig, savedASig;
Peter Maydellcbcef452010-12-07 15:37:34 +00003003 int32 z;
3004
3005 aSig = extractFloat64Frac( a );
3006 aExp = extractFloat64Exp( a );
3007 aSign = extractFloat64Sign( a );
3008 if ( 0x40E < aExp ) {
3009 if ( ( aExp == 0x7FF ) && aSig ) {
3010 aSign = 0;
3011 }
3012 goto invalid;
3013 }
3014 else if ( aExp < 0x3FF ) {
3015 if ( aExp || aSig ) {
3016 STATUS(float_exception_flags) |= float_flag_inexact;
3017 }
3018 return 0;
3019 }
3020 aSig |= LIT64( 0x0010000000000000 );
3021 shiftCount = 0x433 - aExp;
3022 savedASig = aSig;
3023 aSig >>= shiftCount;
3024 z = aSig;
3025 if ( aSign ) {
3026 z = - z;
3027 }
3028 if ( ( (int16_t)z < 0 ) ^ aSign ) {
3029 invalid:
3030 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01003031 return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
Peter Maydellcbcef452010-12-07 15:37:34 +00003032 }
3033 if ( ( aSig<<shiftCount ) != savedASig ) {
3034 STATUS(float_exception_flags) |= float_flag_inexact;
3035 }
3036 return z;
3037}
3038
3039/*----------------------------------------------------------------------------
3040| Returns the result of converting the double-precision floating-point value
bellard158142c2005-03-13 16:54:06 +00003041| `a' to the 64-bit two's complement integer format. The conversion is
3042| performed according to the IEC/IEEE Standard for Binary Floating-Point
3043| Arithmetic---which means in particular that the conversion is rounded
3044| according to the current rounding mode. If `a' is a NaN, the largest
3045| positive integer is returned. Otherwise, if the conversion overflows, the
3046| largest integer with the same sign as `a' is returned.
3047*----------------------------------------------------------------------------*/
3048
3049int64 float64_to_int64( float64 a STATUS_PARAM )
3050{
3051 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003052 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003053 uint64_t aSig, aSigExtra;
Peter Maydell37d18662011-01-06 19:37:53 +00003054 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003055
3056 aSig = extractFloat64Frac( a );
3057 aExp = extractFloat64Exp( a );
3058 aSign = extractFloat64Sign( a );
3059 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3060 shiftCount = 0x433 - aExp;
3061 if ( shiftCount <= 0 ) {
3062 if ( 0x43E < aExp ) {
3063 float_raise( float_flag_invalid STATUS_VAR);
3064 if ( ! aSign
3065 || ( ( aExp == 0x7FF )
3066 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3067 ) {
3068 return LIT64( 0x7FFFFFFFFFFFFFFF );
3069 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01003070 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00003071 }
3072 aSigExtra = 0;
3073 aSig <<= - shiftCount;
3074 }
3075 else {
3076 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3077 }
3078 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
3079
3080}
3081
3082/*----------------------------------------------------------------------------
3083| Returns the result of converting the double-precision floating-point value
3084| `a' to the 64-bit two's complement integer format. The conversion is
3085| performed according to the IEC/IEEE Standard for Binary Floating-Point
3086| Arithmetic, except that the conversion is always rounded toward zero.
3087| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
3088| the conversion overflows, the largest integer with the same sign as `a' is
3089| returned.
3090*----------------------------------------------------------------------------*/
3091
3092int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
3093{
3094 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003095 int_fast16_t aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003096 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00003097 int64 z;
Peter Maydell37d18662011-01-06 19:37:53 +00003098 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003099
3100 aSig = extractFloat64Frac( a );
3101 aExp = extractFloat64Exp( a );
3102 aSign = extractFloat64Sign( a );
3103 if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
3104 shiftCount = aExp - 0x433;
3105 if ( 0 <= shiftCount ) {
3106 if ( 0x43E <= aExp ) {
pbrookf090c9d2007-11-18 14:33:24 +00003107 if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
bellard158142c2005-03-13 16:54:06 +00003108 float_raise( float_flag_invalid STATUS_VAR);
3109 if ( ! aSign
3110 || ( ( aExp == 0x7FF )
3111 && ( aSig != LIT64( 0x0010000000000000 ) ) )
3112 ) {
3113 return LIT64( 0x7FFFFFFFFFFFFFFF );
3114 }
3115 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01003116 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00003117 }
3118 z = aSig<<shiftCount;
3119 }
3120 else {
3121 if ( aExp < 0x3FE ) {
3122 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
3123 return 0;
3124 }
3125 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003126 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00003127 STATUS(float_exception_flags) |= float_flag_inexact;
3128 }
3129 }
3130 if ( aSign ) z = - z;
3131 return z;
3132
3133}
3134
3135/*----------------------------------------------------------------------------
3136| Returns the result of converting the double-precision floating-point value
3137| `a' to the single-precision floating-point format. The conversion is
3138| performed according to the IEC/IEEE Standard for Binary Floating-Point
3139| Arithmetic.
3140*----------------------------------------------------------------------------*/
3141
3142float32 float64_to_float32( float64 a STATUS_PARAM )
3143{
3144 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003145 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003146 uint64_t aSig;
3147 uint32_t zSig;
Peter Maydell37d18662011-01-06 19:37:53 +00003148 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003149
3150 aSig = extractFloat64Frac( a );
3151 aExp = extractFloat64Exp( a );
3152 aSign = extractFloat64Sign( a );
3153 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00003154 if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00003155 return packFloat32( aSign, 0xFF, 0 );
3156 }
3157 shift64RightJamming( aSig, 22, &aSig );
3158 zSig = aSig;
3159 if ( aExp || zSig ) {
3160 zSig |= 0x40000000;
3161 aExp -= 0x381;
3162 }
3163 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
3164
3165}
3166
Paul Brook60011492009-11-19 16:45:20 +00003167
3168/*----------------------------------------------------------------------------
3169| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3170| half-precision floating-point value, returning the result. After being
3171| shifted into the proper positions, the three fields are simply added
3172| together to form the result. This means that any integer portion of `zSig'
3173| will be added into the exponent. Since a properly normalized significand
3174| will have an integer portion equal to 1, the `zExp' input should be 1 less
3175| than the desired result exponent whenever `zSig' is a complete, normalized
3176| significand.
3177*----------------------------------------------------------------------------*/
Andreas Färber94a49d82012-04-26 00:15:56 +02003178static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
Paul Brook60011492009-11-19 16:45:20 +00003179{
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003180 return make_float16(
Andreas Färberbb98fe42011-03-07 01:34:06 +01003181 (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
Paul Brook60011492009-11-19 16:45:20 +00003182}
3183
Peter Maydellc4a1c5e2014-01-07 17:19:11 +00003184/*----------------------------------------------------------------------------
3185| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3186| and significand `zSig', and returns the proper half-precision floating-
3187| point value corresponding to the abstract input. Ordinarily, the abstract
3188| value is simply rounded and packed into the half-precision format, with
3189| the inexact exception raised if the abstract input cannot be represented
3190| exactly. However, if the abstract value is too large, the overflow and
3191| inexact exceptions are raised and an infinity or maximal finite value is
3192| returned. If the abstract value is too small, the input value is rounded to
3193| a subnormal number, and the underflow and inexact exceptions are raised if
3194| the abstract input cannot be represented exactly as a subnormal half-
3195| precision floating-point number.
3196| The `ieee' flag indicates whether to use IEEE standard half precision, or
3197| ARM-style "alternative representation", which omits the NaN and Inf
3198| encodings in order to raise the maximum representable exponent by one.
3199| The input significand `zSig' has its binary point between bits 22
3200| and 23, which is 13 bits to the left of the usual location. This shifted
3201| significand must be normalized or smaller. If `zSig' is not normalized,
3202| `zExp' must be 0; in that case, the result returned is a subnormal number,
3203| and it must not require rounding. In the usual case that `zSig' is
3204| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3205| Note the slightly odd position of the binary point in zSig compared with the
3206| other roundAndPackFloat functions. This should probably be fixed if we
3207| need to implement more float16 routines than just conversion.
3208| The handling of underflow and overflow follows the IEC/IEEE Standard for
3209| Binary Floating-Point Arithmetic.
3210*----------------------------------------------------------------------------*/
3211
3212static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp,
3213 uint32_t zSig, flag ieee STATUS_PARAM)
3214{
3215 int maxexp = ieee ? 29 : 30;
3216 uint32_t mask;
3217 uint32_t increment;
Peter Maydellc4a1c5e2014-01-07 17:19:11 +00003218 bool rounding_bumps_exp;
3219 bool is_tiny = false;
3220
3221 /* Calculate the mask of bits of the mantissa which are not
3222 * representable in half-precision and will be lost.
3223 */
3224 if (zExp < 1) {
3225 /* Will be denormal in halfprec */
3226 mask = 0x00ffffff;
3227 if (zExp >= -11) {
3228 mask >>= 11 + zExp;
3229 }
3230 } else {
3231 /* Normal number in halfprec */
3232 mask = 0x00001fff;
3233 }
3234
Peter Maydelldc355b72014-01-07 17:19:12 +00003235 switch (STATUS(float_rounding_mode)) {
Peter Maydellc4a1c5e2014-01-07 17:19:11 +00003236 case float_round_nearest_even:
3237 increment = (mask + 1) >> 1;
3238 if ((zSig & mask) == increment) {
3239 increment = zSig & (increment << 1);
3240 }
3241 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00003242 case float_round_ties_away:
3243 increment = (mask + 1) >> 1;
3244 break;
Peter Maydellc4a1c5e2014-01-07 17:19:11 +00003245 case float_round_up:
3246 increment = zSign ? 0 : mask;
3247 break;
3248 case float_round_down:
3249 increment = zSign ? mask : 0;
3250 break;
3251 default: /* round_to_zero */
3252 increment = 0;
3253 break;
3254 }
3255
3256 rounding_bumps_exp = (zSig + increment >= 0x01000000);
3257
3258 if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) {
3259 if (ieee) {
3260 float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
3261 return packFloat16(zSign, 0x1f, 0);
3262 } else {
3263 float_raise(float_flag_invalid STATUS_VAR);
3264 return packFloat16(zSign, 0x1f, 0x3ff);
3265 }
3266 }
3267
3268 if (zExp < 0) {
3269 /* Note that flush-to-zero does not affect half-precision results */
3270 is_tiny =
3271 (STATUS(float_detect_tininess) == float_tininess_before_rounding)
3272 || (zExp < -1)
3273 || (!rounding_bumps_exp);
3274 }
3275 if (zSig & mask) {
3276 float_raise(float_flag_inexact STATUS_VAR);
3277 if (is_tiny) {
3278 float_raise(float_flag_underflow STATUS_VAR);
3279 }
3280 }
3281
3282 zSig += increment;
3283 if (rounding_bumps_exp) {
3284 zSig >>= 1;
3285 zExp++;
3286 }
3287
3288 if (zExp < -10) {
3289 return packFloat16(zSign, 0, 0);
3290 }
3291 if (zExp < 0) {
3292 zSig >>= -zExp;
3293 zExp = 0;
3294 }
3295 return packFloat16(zSign, zExp, zSig >> 13);
3296}
3297
3298static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr,
3299 uint32_t *zSigPtr)
3300{
3301 int8_t shiftCount = countLeadingZeros32(aSig) - 21;
3302 *zSigPtr = aSig << shiftCount;
3303 *zExpPtr = 1 - shiftCount;
3304}
3305
Paul Brook60011492009-11-19 16:45:20 +00003306/* Half precision floats come in two formats: standard IEEE and "ARM" format.
3307 The latter gains extra exponent range by omitting the NaN/Inf encodings. */
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003308
3309float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
Paul Brook60011492009-11-19 16:45:20 +00003310{
3311 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003312 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003313 uint32_t aSig;
Paul Brook60011492009-11-19 16:45:20 +00003314
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003315 aSign = extractFloat16Sign(a);
3316 aExp = extractFloat16Exp(a);
3317 aSig = extractFloat16Frac(a);
Paul Brook60011492009-11-19 16:45:20 +00003318
3319 if (aExp == 0x1f && ieee) {
3320 if (aSig) {
Peter Maydellf591e1b2011-02-10 11:28:59 +00003321 return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003322 }
Peter Maydell4be8eea2012-09-24 17:28:35 +01003323 return packFloat32(aSign, 0xff, 0);
Paul Brook60011492009-11-19 16:45:20 +00003324 }
3325 if (aExp == 0) {
Paul Brook60011492009-11-19 16:45:20 +00003326 if (aSig == 0) {
3327 return packFloat32(aSign, 0, 0);
3328 }
3329
Peter Maydellc4a1c5e2014-01-07 17:19:11 +00003330 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3331 aExp--;
Paul Brook60011492009-11-19 16:45:20 +00003332 }
3333 return packFloat32( aSign, aExp + 0x70, aSig << 13);
3334}
3335
Peter Maydellbb4d4bb2011-02-10 11:28:56 +00003336float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
Paul Brook60011492009-11-19 16:45:20 +00003337{
3338 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003339 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003340 uint32_t aSig;
Peter Maydell38970ef2014-01-06 11:47:21 +00003341
Peter Maydell37d18662011-01-06 19:37:53 +00003342 a = float32_squash_input_denormal(a STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003343
3344 aSig = extractFloat32Frac( a );
3345 aExp = extractFloat32Exp( a );
3346 aSign = extractFloat32Sign( a );
3347 if ( aExp == 0xFF ) {
3348 if (aSig) {
Peter Maydell600e30d2011-02-10 11:28:58 +00003349 /* Input is a NaN */
Peter Maydell600e30d2011-02-10 11:28:58 +00003350 if (!ieee) {
Peter Maydell38970ef2014-01-06 11:47:21 +00003351 float_raise(float_flag_invalid STATUS_VAR);
Peter Maydell600e30d2011-02-10 11:28:58 +00003352 return packFloat16(aSign, 0, 0);
3353 }
Peter Maydell38970ef2014-01-06 11:47:21 +00003354 return commonNaNToFloat16(
3355 float32ToCommonNaN(a STATUS_VAR) STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003356 }
Peter Maydell600e30d2011-02-10 11:28:58 +00003357 /* Infinity */
3358 if (!ieee) {
3359 float_raise(float_flag_invalid STATUS_VAR);
3360 return packFloat16(aSign, 0x1f, 0x3ff);
3361 }
3362 return packFloat16(aSign, 0x1f, 0);
Paul Brook60011492009-11-19 16:45:20 +00003363 }
Peter Maydell600e30d2011-02-10 11:28:58 +00003364 if (aExp == 0 && aSig == 0) {
Paul Brook60011492009-11-19 16:45:20 +00003365 return packFloat16(aSign, 0, 0);
3366 }
Peter Maydell38970ef2014-01-06 11:47:21 +00003367 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3368 * even if the input is denormal; however this is harmless because
3369 * the largest possible single-precision denormal is still smaller
3370 * than the smallest representable half-precision denormal, and so we
3371 * will end up ignoring aSig and returning via the "always return zero"
3372 * codepath.
3373 */
Paul Brook60011492009-11-19 16:45:20 +00003374 aSig |= 0x00800000;
Peter Maydellc4a1c5e2014-01-07 17:19:11 +00003375 aExp -= 0x71;
Peter Maydell38970ef2014-01-06 11:47:21 +00003376
Peter Maydellc4a1c5e2014-01-07 17:19:11 +00003377 return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR);
Paul Brook60011492009-11-19 16:45:20 +00003378}
3379
Peter Maydell14c9a072014-01-07 17:19:12 +00003380float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM)
3381{
3382 flag aSign;
3383 int_fast16_t aExp;
3384 uint32_t aSig;
3385
3386 aSign = extractFloat16Sign(a);
3387 aExp = extractFloat16Exp(a);
3388 aSig = extractFloat16Frac(a);
3389
3390 if (aExp == 0x1f && ieee) {
3391 if (aSig) {
3392 return commonNaNToFloat64(
3393 float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
3394 }
3395 return packFloat64(aSign, 0x7ff, 0);
3396 }
3397 if (aExp == 0) {
3398 if (aSig == 0) {
3399 return packFloat64(aSign, 0, 0);
3400 }
3401
3402 normalizeFloat16Subnormal(aSig, &aExp, &aSig);
3403 aExp--;
3404 }
3405 return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42);
3406}
3407
3408float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM)
3409{
3410 flag aSign;
3411 int_fast16_t aExp;
3412 uint64_t aSig;
3413 uint32_t zSig;
3414
3415 a = float64_squash_input_denormal(a STATUS_VAR);
3416
3417 aSig = extractFloat64Frac(a);
3418 aExp = extractFloat64Exp(a);
3419 aSign = extractFloat64Sign(a);
3420 if (aExp == 0x7FF) {
3421 if (aSig) {
3422 /* Input is a NaN */
3423 if (!ieee) {
3424 float_raise(float_flag_invalid STATUS_VAR);
3425 return packFloat16(aSign, 0, 0);
3426 }
3427 return commonNaNToFloat16(
3428 float64ToCommonNaN(a STATUS_VAR) STATUS_VAR);
3429 }
3430 /* Infinity */
3431 if (!ieee) {
3432 float_raise(float_flag_invalid STATUS_VAR);
3433 return packFloat16(aSign, 0x1f, 0x3ff);
3434 }
3435 return packFloat16(aSign, 0x1f, 0);
3436 }
3437 shift64RightJamming(aSig, 29, &aSig);
3438 zSig = aSig;
3439 if (aExp == 0 && zSig == 0) {
3440 return packFloat16(aSign, 0, 0);
3441 }
3442 /* Decimal point between bits 22 and 23. Note that we add the 1 bit
3443 * even if the input is denormal; however this is harmless because
3444 * the largest possible single-precision denormal is still smaller
3445 * than the smallest representable half-precision denormal, and so we
3446 * will end up ignoring aSig and returning via the "always return zero"
3447 * codepath.
3448 */
3449 zSig |= 0x00800000;
3450 aExp -= 0x3F1;
3451
3452 return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR);
3453}
3454
bellard158142c2005-03-13 16:54:06 +00003455/*----------------------------------------------------------------------------
3456| Returns the result of converting the double-precision floating-point value
3457| `a' to the extended double-precision floating-point format. The conversion
3458| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3459| Arithmetic.
3460*----------------------------------------------------------------------------*/
3461
3462floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
3463{
3464 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003465 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003466 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00003467
Peter Maydell37d18662011-01-06 19:37:53 +00003468 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003469 aSig = extractFloat64Frac( a );
3470 aExp = extractFloat64Exp( a );
3471 aSign = extractFloat64Sign( a );
3472 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00003473 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00003474 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3475 }
3476 if ( aExp == 0 ) {
3477 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3478 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3479 }
3480 return
3481 packFloatx80(
3482 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3483
3484}
3485
bellard158142c2005-03-13 16:54:06 +00003486/*----------------------------------------------------------------------------
3487| Returns the result of converting the double-precision floating-point value
3488| `a' to the quadruple-precision floating-point format. The conversion is
3489| performed according to the IEC/IEEE Standard for Binary Floating-Point
3490| Arithmetic.
3491*----------------------------------------------------------------------------*/
3492
3493float128 float64_to_float128( float64 a STATUS_PARAM )
3494{
3495 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003496 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003497 uint64_t aSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00003498
Peter Maydell37d18662011-01-06 19:37:53 +00003499 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003500 aSig = extractFloat64Frac( a );
3501 aExp = extractFloat64Exp( a );
3502 aSign = extractFloat64Sign( a );
3503 if ( aExp == 0x7FF ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00003504 if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00003505 return packFloat128( aSign, 0x7FFF, 0, 0 );
3506 }
3507 if ( aExp == 0 ) {
3508 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3509 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3510 --aExp;
3511 }
3512 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3513 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3514
3515}
3516
bellard158142c2005-03-13 16:54:06 +00003517/*----------------------------------------------------------------------------
3518| Rounds the double-precision floating-point value `a' to an integer, and
3519| returns the result as a double-precision floating-point value. The
3520| operation is performed according to the IEC/IEEE Standard for Binary
3521| Floating-Point Arithmetic.
3522*----------------------------------------------------------------------------*/
3523
3524float64 float64_round_to_int( float64 a STATUS_PARAM )
3525{
3526 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003527 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003528 uint64_t lastBitMask, roundBitsMask;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003529 uint64_t z;
Peter Maydell37d18662011-01-06 19:37:53 +00003530 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003531
3532 aExp = extractFloat64Exp( a );
3533 if ( 0x433 <= aExp ) {
3534 if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
3535 return propagateFloat64NaN( a, a STATUS_VAR );
3536 }
3537 return a;
3538 }
3539 if ( aExp < 0x3FF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01003540 if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00003541 STATUS(float_exception_flags) |= float_flag_inexact;
3542 aSign = extractFloat64Sign( a );
3543 switch ( STATUS(float_rounding_mode) ) {
3544 case float_round_nearest_even:
3545 if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
3546 return packFloat64( aSign, 0x3FF, 0 );
3547 }
3548 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00003549 case float_round_ties_away:
3550 if (aExp == 0x3FE) {
3551 return packFloat64(aSign, 0x3ff, 0);
3552 }
3553 break;
bellard158142c2005-03-13 16:54:06 +00003554 case float_round_down:
pbrookf090c9d2007-11-18 14:33:24 +00003555 return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
bellard158142c2005-03-13 16:54:06 +00003556 case float_round_up:
pbrookf090c9d2007-11-18 14:33:24 +00003557 return make_float64(
3558 aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
bellard158142c2005-03-13 16:54:06 +00003559 }
3560 return packFloat64( aSign, 0, 0 );
3561 }
3562 lastBitMask = 1;
3563 lastBitMask <<= 0x433 - aExp;
3564 roundBitsMask = lastBitMask - 1;
pbrookf090c9d2007-11-18 14:33:24 +00003565 z = float64_val(a);
Peter Maydelldc355b72014-01-07 17:19:12 +00003566 switch (STATUS(float_rounding_mode)) {
3567 case float_round_nearest_even:
3568 z += lastBitMask >> 1;
3569 if ((z & roundBitsMask) == 0) {
3570 z &= ~lastBitMask;
3571 }
3572 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00003573 case float_round_ties_away:
3574 z += lastBitMask >> 1;
3575 break;
Peter Maydelldc355b72014-01-07 17:19:12 +00003576 case float_round_to_zero:
3577 break;
3578 case float_round_up:
3579 if (!extractFloat64Sign(make_float64(z))) {
bellard158142c2005-03-13 16:54:06 +00003580 z += roundBitsMask;
3581 }
Peter Maydelldc355b72014-01-07 17:19:12 +00003582 break;
3583 case float_round_down:
3584 if (extractFloat64Sign(make_float64(z))) {
3585 z += roundBitsMask;
3586 }
3587 break;
3588 default:
3589 abort();
bellard158142c2005-03-13 16:54:06 +00003590 }
3591 z &= ~ roundBitsMask;
pbrookf090c9d2007-11-18 14:33:24 +00003592 if ( z != float64_val(a) )
3593 STATUS(float_exception_flags) |= float_flag_inexact;
3594 return make_float64(z);
bellard158142c2005-03-13 16:54:06 +00003595
3596}
3597
pbrooke6e59062006-10-22 00:18:54 +00003598float64 float64_trunc_to_int( float64 a STATUS_PARAM)
3599{
3600 int oldmode;
3601 float64 res;
3602 oldmode = STATUS(float_rounding_mode);
3603 STATUS(float_rounding_mode) = float_round_to_zero;
3604 res = float64_round_to_int(a STATUS_VAR);
3605 STATUS(float_rounding_mode) = oldmode;
3606 return res;
3607}
3608
bellard158142c2005-03-13 16:54:06 +00003609/*----------------------------------------------------------------------------
3610| Returns the result of adding the absolute values of the double-precision
3611| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
3612| before being returned. `zSign' is ignored if the result is a NaN.
3613| The addition is performed according to the IEC/IEEE Standard for Binary
3614| Floating-Point Arithmetic.
3615*----------------------------------------------------------------------------*/
3616
3617static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3618{
Andreas Färber94a49d82012-04-26 00:15:56 +02003619 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003620 uint64_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02003621 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00003622
3623 aSig = extractFloat64Frac( a );
3624 aExp = extractFloat64Exp( a );
3625 bSig = extractFloat64Frac( b );
3626 bExp = extractFloat64Exp( b );
3627 expDiff = aExp - bExp;
3628 aSig <<= 9;
3629 bSig <<= 9;
3630 if ( 0 < expDiff ) {
3631 if ( aExp == 0x7FF ) {
3632 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3633 return a;
3634 }
3635 if ( bExp == 0 ) {
3636 --expDiff;
3637 }
3638 else {
3639 bSig |= LIT64( 0x2000000000000000 );
3640 }
3641 shift64RightJamming( bSig, expDiff, &bSig );
3642 zExp = aExp;
3643 }
3644 else if ( expDiff < 0 ) {
3645 if ( bExp == 0x7FF ) {
3646 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3647 return packFloat64( zSign, 0x7FF, 0 );
3648 }
3649 if ( aExp == 0 ) {
3650 ++expDiff;
3651 }
3652 else {
3653 aSig |= LIT64( 0x2000000000000000 );
3654 }
3655 shift64RightJamming( aSig, - expDiff, &aSig );
3656 zExp = bExp;
3657 }
3658 else {
3659 if ( aExp == 0x7FF ) {
3660 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3661 return a;
3662 }
pbrookfe76d972008-12-19 14:33:59 +00003663 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01003664 if (STATUS(flush_to_zero)) {
3665 if (aSig | bSig) {
3666 float_raise(float_flag_output_denormal STATUS_VAR);
3667 }
3668 return packFloat64(zSign, 0, 0);
3669 }
pbrookfe76d972008-12-19 14:33:59 +00003670 return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
3671 }
bellard158142c2005-03-13 16:54:06 +00003672 zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
3673 zExp = aExp;
3674 goto roundAndPack;
3675 }
3676 aSig |= LIT64( 0x2000000000000000 );
3677 zSig = ( aSig + bSig )<<1;
3678 --zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003679 if ( (int64_t) zSig < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003680 zSig = aSig + bSig;
3681 ++zExp;
3682 }
3683 roundAndPack:
3684 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3685
3686}
3687
3688/*----------------------------------------------------------------------------
3689| Returns the result of subtracting the absolute values of the double-
3690| precision floating-point values `a' and `b'. If `zSign' is 1, the
3691| difference is negated before being returned. `zSign' is ignored if the
3692| result is a NaN. The subtraction is performed according to the IEC/IEEE
3693| Standard for Binary Floating-Point Arithmetic.
3694*----------------------------------------------------------------------------*/
3695
3696static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
3697{
Andreas Färber94a49d82012-04-26 00:15:56 +02003698 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003699 uint64_t aSig, bSig, zSig;
Andreas Färber94a49d82012-04-26 00:15:56 +02003700 int_fast16_t expDiff;
bellard158142c2005-03-13 16:54:06 +00003701
3702 aSig = extractFloat64Frac( a );
3703 aExp = extractFloat64Exp( a );
3704 bSig = extractFloat64Frac( b );
3705 bExp = extractFloat64Exp( b );
3706 expDiff = aExp - bExp;
3707 aSig <<= 10;
3708 bSig <<= 10;
3709 if ( 0 < expDiff ) goto aExpBigger;
3710 if ( expDiff < 0 ) goto bExpBigger;
3711 if ( aExp == 0x7FF ) {
3712 if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3713 float_raise( float_flag_invalid STATUS_VAR);
3714 return float64_default_nan;
3715 }
3716 if ( aExp == 0 ) {
3717 aExp = 1;
3718 bExp = 1;
3719 }
3720 if ( bSig < aSig ) goto aBigger;
3721 if ( aSig < bSig ) goto bBigger;
3722 return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
3723 bExpBigger:
3724 if ( bExp == 0x7FF ) {
3725 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3726 return packFloat64( zSign ^ 1, 0x7FF, 0 );
3727 }
3728 if ( aExp == 0 ) {
3729 ++expDiff;
3730 }
3731 else {
3732 aSig |= LIT64( 0x4000000000000000 );
3733 }
3734 shift64RightJamming( aSig, - expDiff, &aSig );
3735 bSig |= LIT64( 0x4000000000000000 );
3736 bBigger:
3737 zSig = bSig - aSig;
3738 zExp = bExp;
3739 zSign ^= 1;
3740 goto normalizeRoundAndPack;
3741 aExpBigger:
3742 if ( aExp == 0x7FF ) {
3743 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3744 return a;
3745 }
3746 if ( bExp == 0 ) {
3747 --expDiff;
3748 }
3749 else {
3750 bSig |= LIT64( 0x4000000000000000 );
3751 }
3752 shift64RightJamming( bSig, expDiff, &bSig );
3753 aSig |= LIT64( 0x4000000000000000 );
3754 aBigger:
3755 zSig = aSig - bSig;
3756 zExp = aExp;
3757 normalizeRoundAndPack:
3758 --zExp;
3759 return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3760
3761}
3762
3763/*----------------------------------------------------------------------------
3764| Returns the result of adding the double-precision floating-point values `a'
3765| and `b'. The operation is performed according to the IEC/IEEE Standard for
3766| Binary Floating-Point Arithmetic.
3767*----------------------------------------------------------------------------*/
3768
3769float64 float64_add( float64 a, float64 b STATUS_PARAM )
3770{
3771 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00003772 a = float64_squash_input_denormal(a STATUS_VAR);
3773 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003774
3775 aSign = extractFloat64Sign( a );
3776 bSign = extractFloat64Sign( b );
3777 if ( aSign == bSign ) {
3778 return addFloat64Sigs( a, b, aSign STATUS_VAR );
3779 }
3780 else {
3781 return subFloat64Sigs( a, b, aSign STATUS_VAR );
3782 }
3783
3784}
3785
3786/*----------------------------------------------------------------------------
3787| Returns the result of subtracting the double-precision floating-point values
3788| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3789| for Binary Floating-Point Arithmetic.
3790*----------------------------------------------------------------------------*/
3791
3792float64 float64_sub( float64 a, float64 b STATUS_PARAM )
3793{
3794 flag aSign, bSign;
Peter Maydell37d18662011-01-06 19:37:53 +00003795 a = float64_squash_input_denormal(a STATUS_VAR);
3796 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003797
3798 aSign = extractFloat64Sign( a );
3799 bSign = extractFloat64Sign( b );
3800 if ( aSign == bSign ) {
3801 return subFloat64Sigs( a, b, aSign STATUS_VAR );
3802 }
3803 else {
3804 return addFloat64Sigs( a, b, aSign STATUS_VAR );
3805 }
3806
3807}
3808
3809/*----------------------------------------------------------------------------
3810| Returns the result of multiplying the double-precision floating-point values
3811| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
3812| for Binary Floating-Point Arithmetic.
3813*----------------------------------------------------------------------------*/
3814
3815float64 float64_mul( float64 a, float64 b STATUS_PARAM )
3816{
3817 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003818 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003819 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00003820
Peter Maydell37d18662011-01-06 19:37:53 +00003821 a = float64_squash_input_denormal(a STATUS_VAR);
3822 b = float64_squash_input_denormal(b STATUS_VAR);
3823
bellard158142c2005-03-13 16:54:06 +00003824 aSig = extractFloat64Frac( a );
3825 aExp = extractFloat64Exp( a );
3826 aSign = extractFloat64Sign( a );
3827 bSig = extractFloat64Frac( b );
3828 bExp = extractFloat64Exp( b );
3829 bSign = extractFloat64Sign( b );
3830 zSign = aSign ^ bSign;
3831 if ( aExp == 0x7FF ) {
3832 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3833 return propagateFloat64NaN( a, b STATUS_VAR );
3834 }
3835 if ( ( bExp | bSig ) == 0 ) {
3836 float_raise( float_flag_invalid STATUS_VAR);
3837 return float64_default_nan;
3838 }
3839 return packFloat64( zSign, 0x7FF, 0 );
3840 }
3841 if ( bExp == 0x7FF ) {
3842 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3843 if ( ( aExp | aSig ) == 0 ) {
3844 float_raise( float_flag_invalid STATUS_VAR);
3845 return float64_default_nan;
3846 }
3847 return packFloat64( zSign, 0x7FF, 0 );
3848 }
3849 if ( aExp == 0 ) {
3850 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3851 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3852 }
3853 if ( bExp == 0 ) {
3854 if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
3855 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3856 }
3857 zExp = aExp + bExp - 0x3FF;
3858 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3859 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3860 mul64To128( aSig, bSig, &zSig0, &zSig1 );
3861 zSig0 |= ( zSig1 != 0 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003862 if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00003863 zSig0 <<= 1;
3864 --zExp;
3865 }
3866 return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
3867
3868}
3869
3870/*----------------------------------------------------------------------------
3871| Returns the result of dividing the double-precision floating-point value `a'
3872| by the corresponding value `b'. The operation is performed according to
3873| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3874*----------------------------------------------------------------------------*/
3875
3876float64 float64_div( float64 a, float64 b STATUS_PARAM )
3877{
3878 flag aSign, bSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003879 int_fast16_t aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003880 uint64_t aSig, bSig, zSig;
3881 uint64_t rem0, rem1;
3882 uint64_t term0, term1;
Peter Maydell37d18662011-01-06 19:37:53 +00003883 a = float64_squash_input_denormal(a STATUS_VAR);
3884 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003885
3886 aSig = extractFloat64Frac( a );
3887 aExp = extractFloat64Exp( a );
3888 aSign = extractFloat64Sign( a );
3889 bSig = extractFloat64Frac( b );
3890 bExp = extractFloat64Exp( b );
3891 bSign = extractFloat64Sign( b );
3892 zSign = aSign ^ bSign;
3893 if ( aExp == 0x7FF ) {
3894 if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3895 if ( bExp == 0x7FF ) {
3896 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3897 float_raise( float_flag_invalid STATUS_VAR);
3898 return float64_default_nan;
3899 }
3900 return packFloat64( zSign, 0x7FF, 0 );
3901 }
3902 if ( bExp == 0x7FF ) {
3903 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3904 return packFloat64( zSign, 0, 0 );
3905 }
3906 if ( bExp == 0 ) {
3907 if ( bSig == 0 ) {
3908 if ( ( aExp | aSig ) == 0 ) {
3909 float_raise( float_flag_invalid STATUS_VAR);
3910 return float64_default_nan;
3911 }
3912 float_raise( float_flag_divbyzero STATUS_VAR);
3913 return packFloat64( zSign, 0x7FF, 0 );
3914 }
3915 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3916 }
3917 if ( aExp == 0 ) {
3918 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3919 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3920 }
3921 zExp = aExp - bExp + 0x3FD;
3922 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3923 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3924 if ( bSig <= ( aSig + aSig ) ) {
3925 aSig >>= 1;
3926 ++zExp;
3927 }
3928 zSig = estimateDiv128To64( aSig, 0, bSig );
3929 if ( ( zSig & 0x1FF ) <= 2 ) {
3930 mul64To128( bSig, zSig, &term0, &term1 );
3931 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01003932 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00003933 --zSig;
3934 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3935 }
3936 zSig |= ( rem1 != 0 );
3937 }
3938 return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
3939
3940}
3941
3942/*----------------------------------------------------------------------------
3943| Returns the remainder of the double-precision floating-point value `a'
3944| with respect to the corresponding value `b'. The operation is performed
3945| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3946*----------------------------------------------------------------------------*/
3947
3948float64 float64_rem( float64 a, float64 b STATUS_PARAM )
3949{
Blue Swirled086f32010-03-07 13:49:58 +00003950 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02003951 int_fast16_t aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01003952 uint64_t aSig, bSig;
3953 uint64_t q, alternateASig;
3954 int64_t sigMean;
bellard158142c2005-03-13 16:54:06 +00003955
Peter Maydell37d18662011-01-06 19:37:53 +00003956 a = float64_squash_input_denormal(a STATUS_VAR);
3957 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00003958 aSig = extractFloat64Frac( a );
3959 aExp = extractFloat64Exp( a );
3960 aSign = extractFloat64Sign( a );
3961 bSig = extractFloat64Frac( b );
3962 bExp = extractFloat64Exp( b );
bellard158142c2005-03-13 16:54:06 +00003963 if ( aExp == 0x7FF ) {
3964 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3965 return propagateFloat64NaN( a, b STATUS_VAR );
3966 }
3967 float_raise( float_flag_invalid STATUS_VAR);
3968 return float64_default_nan;
3969 }
3970 if ( bExp == 0x7FF ) {
3971 if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
3972 return a;
3973 }
3974 if ( bExp == 0 ) {
3975 if ( bSig == 0 ) {
3976 float_raise( float_flag_invalid STATUS_VAR);
3977 return float64_default_nan;
3978 }
3979 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3980 }
3981 if ( aExp == 0 ) {
3982 if ( aSig == 0 ) return a;
3983 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3984 }
3985 expDiff = aExp - bExp;
3986 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3987 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3988 if ( expDiff < 0 ) {
3989 if ( expDiff < -1 ) return a;
3990 aSig >>= 1;
3991 }
3992 q = ( bSig <= aSig );
3993 if ( q ) aSig -= bSig;
3994 expDiff -= 64;
3995 while ( 0 < expDiff ) {
3996 q = estimateDiv128To64( aSig, 0, bSig );
3997 q = ( 2 < q ) ? q - 2 : 0;
3998 aSig = - ( ( bSig>>2 ) * q );
3999 expDiff -= 62;
4000 }
4001 expDiff += 64;
4002 if ( 0 < expDiff ) {
4003 q = estimateDiv128To64( aSig, 0, bSig );
4004 q = ( 2 < q ) ? q - 2 : 0;
4005 q >>= 64 - expDiff;
4006 bSig >>= 2;
4007 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
4008 }
4009 else {
4010 aSig >>= 2;
4011 bSig >>= 2;
4012 }
4013 do {
4014 alternateASig = aSig;
4015 ++q;
4016 aSig -= bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004017 } while ( 0 <= (int64_t) aSig );
bellard158142c2005-03-13 16:54:06 +00004018 sigMean = aSig + alternateASig;
4019 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
4020 aSig = alternateASig;
4021 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01004022 zSign = ( (int64_t) aSig < 0 );
bellard158142c2005-03-13 16:54:06 +00004023 if ( zSign ) aSig = - aSig;
4024 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
4025
4026}
4027
4028/*----------------------------------------------------------------------------
Peter Maydell369be8f2011-10-19 16:14:06 +00004029| Returns the result of multiplying the double-precision floating-point values
4030| `a' and `b' then adding 'c', with no intermediate rounding step after the
4031| multiplication. The operation is performed according to the IEC/IEEE
4032| Standard for Binary Floating-Point Arithmetic 754-2008.
4033| The flags argument allows the caller to select negation of the
4034| addend, the intermediate product, or the final result. (The difference
4035| between this and having the caller do a separate negation is that negating
4036| externally will flip the sign bit on NaNs.)
4037*----------------------------------------------------------------------------*/
4038
4039float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
4040{
4041 flag aSign, bSign, cSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02004042 int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
Peter Maydell369be8f2011-10-19 16:14:06 +00004043 uint64_t aSig, bSig, cSig;
4044 flag pInf, pZero, pSign;
4045 uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
4046 int shiftcount;
4047 flag signflip, infzero;
4048
4049 a = float64_squash_input_denormal(a STATUS_VAR);
4050 b = float64_squash_input_denormal(b STATUS_VAR);
4051 c = float64_squash_input_denormal(c STATUS_VAR);
4052 aSig = extractFloat64Frac(a);
4053 aExp = extractFloat64Exp(a);
4054 aSign = extractFloat64Sign(a);
4055 bSig = extractFloat64Frac(b);
4056 bExp = extractFloat64Exp(b);
4057 bSign = extractFloat64Sign(b);
4058 cSig = extractFloat64Frac(c);
4059 cExp = extractFloat64Exp(c);
4060 cSign = extractFloat64Sign(c);
4061
4062 infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
4063 (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
4064
4065 /* It is implementation-defined whether the cases of (0,inf,qnan)
4066 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
4067 * they return if they do), so we have to hand this information
4068 * off to the target-specific pick-a-NaN routine.
4069 */
4070 if (((aExp == 0x7ff) && aSig) ||
4071 ((bExp == 0x7ff) && bSig) ||
4072 ((cExp == 0x7ff) && cSig)) {
4073 return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
4074 }
4075
4076 if (infzero) {
4077 float_raise(float_flag_invalid STATUS_VAR);
4078 return float64_default_nan;
4079 }
4080
4081 if (flags & float_muladd_negate_c) {
4082 cSign ^= 1;
4083 }
4084
4085 signflip = (flags & float_muladd_negate_result) ? 1 : 0;
4086
4087 /* Work out the sign and type of the product */
4088 pSign = aSign ^ bSign;
4089 if (flags & float_muladd_negate_product) {
4090 pSign ^= 1;
4091 }
4092 pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
4093 pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
4094
4095 if (cExp == 0x7ff) {
4096 if (pInf && (pSign ^ cSign)) {
4097 /* addition of opposite-signed infinities => InvalidOperation */
4098 float_raise(float_flag_invalid STATUS_VAR);
4099 return float64_default_nan;
4100 }
4101 /* Otherwise generate an infinity of the same sign */
4102 return packFloat64(cSign ^ signflip, 0x7ff, 0);
4103 }
4104
4105 if (pInf) {
4106 return packFloat64(pSign ^ signflip, 0x7ff, 0);
4107 }
4108
4109 if (pZero) {
4110 if (cExp == 0) {
4111 if (cSig == 0) {
4112 /* Adding two exact zeroes */
4113 if (pSign == cSign) {
4114 zSign = pSign;
4115 } else if (STATUS(float_rounding_mode) == float_round_down) {
4116 zSign = 1;
4117 } else {
4118 zSign = 0;
4119 }
4120 return packFloat64(zSign ^ signflip, 0, 0);
4121 }
4122 /* Exact zero plus a denorm */
4123 if (STATUS(flush_to_zero)) {
4124 float_raise(float_flag_output_denormal STATUS_VAR);
4125 return packFloat64(cSign ^ signflip, 0, 0);
4126 }
4127 }
4128 /* Zero plus something non-zero : just return the something */
Peter Maydell67d43532014-02-20 10:35:50 +00004129 if (flags & float_muladd_halve_result) {
4130 if (cExp == 0) {
4131 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4132 }
4133 /* Subtract one to halve, and one again because roundAndPackFloat64
4134 * wants one less than the true exponent.
4135 */
4136 cExp -= 2;
4137 cSig = (cSig | 0x0010000000000000ULL) << 10;
4138 return roundAndPackFloat64(cSign ^ signflip, cExp, cSig STATUS_VAR);
4139 }
Richard Sandiforda6e7c182013-01-22 17:03:05 +00004140 return packFloat64(cSign ^ signflip, cExp, cSig);
Peter Maydell369be8f2011-10-19 16:14:06 +00004141 }
4142
4143 if (aExp == 0) {
4144 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
4145 }
4146 if (bExp == 0) {
4147 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
4148 }
4149
4150 /* Calculate the actual result a * b + c */
4151
4152 /* Multiply first; this is easy. */
4153 /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
4154 * because we want the true exponent, not the "one-less-than"
4155 * flavour that roundAndPackFloat64() takes.
4156 */
4157 pExp = aExp + bExp - 0x3fe;
4158 aSig = (aSig | LIT64(0x0010000000000000))<<10;
4159 bSig = (bSig | LIT64(0x0010000000000000))<<11;
4160 mul64To128(aSig, bSig, &pSig0, &pSig1);
4161 if ((int64_t)(pSig0 << 1) >= 0) {
4162 shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
4163 pExp--;
4164 }
4165
4166 zSign = pSign ^ signflip;
4167
4168 /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
4169 * bit in position 126.
4170 */
4171 if (cExp == 0) {
4172 if (!cSig) {
4173 /* Throw out the special case of c being an exact zero now */
4174 shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
Peter Maydell67d43532014-02-20 10:35:50 +00004175 if (flags & float_muladd_halve_result) {
4176 pExp--;
4177 }
Peter Maydell369be8f2011-10-19 16:14:06 +00004178 return roundAndPackFloat64(zSign, pExp - 1,
4179 pSig1 STATUS_VAR);
4180 }
4181 normalizeFloat64Subnormal(cSig, &cExp, &cSig);
4182 }
4183
4184 /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
4185 * significand of the addend, with the explicit bit in position 126.
4186 */
4187 cSig0 = cSig << (126 - 64 - 52);
4188 cSig1 = 0;
4189 cSig0 |= LIT64(0x4000000000000000);
4190 expDiff = pExp - cExp;
4191
4192 if (pSign == cSign) {
4193 /* Addition */
4194 if (expDiff > 0) {
4195 /* scale c to match p */
4196 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4197 zExp = pExp;
4198 } else if (expDiff < 0) {
4199 /* scale p to match c */
4200 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4201 zExp = cExp;
4202 } else {
4203 /* no scaling needed */
4204 zExp = cExp;
4205 }
4206 /* Add significands and make sure explicit bit ends up in posn 126 */
4207 add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4208 if ((int64_t)zSig0 < 0) {
4209 shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
4210 } else {
4211 zExp--;
4212 }
4213 shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
Peter Maydell67d43532014-02-20 10:35:50 +00004214 if (flags & float_muladd_halve_result) {
4215 zExp--;
4216 }
Peter Maydell369be8f2011-10-19 16:14:06 +00004217 return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
4218 } else {
4219 /* Subtraction */
4220 if (expDiff > 0) {
4221 shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
4222 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4223 zExp = pExp;
4224 } else if (expDiff < 0) {
4225 shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
4226 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4227 zExp = cExp;
4228 zSign ^= 1;
4229 } else {
4230 zExp = pExp;
4231 if (lt128(cSig0, cSig1, pSig0, pSig1)) {
4232 sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
4233 } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
4234 sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
4235 zSign ^= 1;
4236 } else {
4237 /* Exact zero */
4238 zSign = signflip;
4239 if (STATUS(float_rounding_mode) == float_round_down) {
4240 zSign ^= 1;
4241 }
4242 return packFloat64(zSign, 0, 0);
4243 }
4244 }
4245 --zExp;
4246 /* Do the equivalent of normalizeRoundAndPackFloat64() but
4247 * starting with the significand in a pair of uint64_t.
4248 */
4249 if (zSig0) {
4250 shiftcount = countLeadingZeros64(zSig0) - 1;
4251 shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
4252 if (zSig1) {
4253 zSig0 |= 1;
4254 }
4255 zExp -= shiftcount;
4256 } else {
Peter Maydelle3d142d2013-04-12 16:37:52 +01004257 shiftcount = countLeadingZeros64(zSig1);
4258 if (shiftcount == 0) {
4259 zSig0 = (zSig1 >> 1) | (zSig1 & 1);
4260 zExp -= 63;
4261 } else {
4262 shiftcount--;
4263 zSig0 = zSig1 << shiftcount;
4264 zExp -= (shiftcount + 64);
4265 }
Peter Maydell369be8f2011-10-19 16:14:06 +00004266 }
Peter Maydell67d43532014-02-20 10:35:50 +00004267 if (flags & float_muladd_halve_result) {
4268 zExp--;
4269 }
Peter Maydell369be8f2011-10-19 16:14:06 +00004270 return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
4271 }
4272}
4273
4274/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00004275| Returns the square root of the double-precision floating-point value `a'.
4276| The operation is performed according to the IEC/IEEE Standard for Binary
4277| Floating-Point Arithmetic.
4278*----------------------------------------------------------------------------*/
4279
4280float64 float64_sqrt( float64 a STATUS_PARAM )
4281{
4282 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02004283 int_fast16_t aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004284 uint64_t aSig, zSig, doubleZSig;
4285 uint64_t rem0, rem1, term0, term1;
Peter Maydell37d18662011-01-06 19:37:53 +00004286 a = float64_squash_input_denormal(a STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004287
4288 aSig = extractFloat64Frac( a );
4289 aExp = extractFloat64Exp( a );
4290 aSign = extractFloat64Sign( a );
4291 if ( aExp == 0x7FF ) {
4292 if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
4293 if ( ! aSign ) return a;
4294 float_raise( float_flag_invalid STATUS_VAR);
4295 return float64_default_nan;
4296 }
4297 if ( aSign ) {
4298 if ( ( aExp | aSig ) == 0 ) return a;
4299 float_raise( float_flag_invalid STATUS_VAR);
4300 return float64_default_nan;
4301 }
4302 if ( aExp == 0 ) {
pbrookf090c9d2007-11-18 14:33:24 +00004303 if ( aSig == 0 ) return float64_zero;
bellard158142c2005-03-13 16:54:06 +00004304 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4305 }
4306 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
4307 aSig |= LIT64( 0x0010000000000000 );
4308 zSig = estimateSqrt32( aExp, aSig>>21 );
4309 aSig <<= 9 - ( aExp & 1 );
4310 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
4311 if ( ( zSig & 0x1FF ) <= 5 ) {
4312 doubleZSig = zSig<<1;
4313 mul64To128( zSig, zSig, &term0, &term1 );
4314 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004315 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00004316 --zSig;
4317 doubleZSig -= 2;
4318 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
4319 }
4320 zSig |= ( ( rem0 | rem1 ) != 0 );
4321 }
4322 return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
4323
4324}
4325
4326/*----------------------------------------------------------------------------
aurel32374dfc32009-02-05 13:42:47 +00004327| Returns the binary log of the double-precision floating-point value `a'.
4328| The operation is performed according to the IEC/IEEE Standard for Binary
4329| Floating-Point Arithmetic.
4330*----------------------------------------------------------------------------*/
4331float64 float64_log2( float64 a STATUS_PARAM )
4332{
4333 flag aSign, zSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02004334 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004335 uint64_t aSig, aSig0, aSig1, zSig, i;
Peter Maydell37d18662011-01-06 19:37:53 +00004336 a = float64_squash_input_denormal(a STATUS_VAR);
aurel32374dfc32009-02-05 13:42:47 +00004337
4338 aSig = extractFloat64Frac( a );
4339 aExp = extractFloat64Exp( a );
4340 aSign = extractFloat64Sign( a );
4341
4342 if ( aExp == 0 ) {
4343 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4344 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4345 }
4346 if ( aSign ) {
4347 float_raise( float_flag_invalid STATUS_VAR);
4348 return float64_default_nan;
4349 }
4350 if ( aExp == 0x7FF ) {
4351 if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
4352 return a;
4353 }
4354
4355 aExp -= 0x3FF;
4356 aSig |= LIT64( 0x0010000000000000 );
4357 zSign = aExp < 0;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004358 zSig = (uint64_t)aExp << 52;
aurel32374dfc32009-02-05 13:42:47 +00004359 for (i = 1LL << 51; i > 0; i >>= 1) {
4360 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4361 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4362 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4363 aSig >>= 1;
4364 zSig |= i;
4365 }
4366 }
4367
4368 if ( zSign )
4369 zSig = -zSig;
4370 return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
4371}
4372
4373/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00004374| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarnob6893622011-04-14 00:49:29 +02004375| corresponding value `b', and 0 otherwise. The invalid exception is raised
4376| if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00004377| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4378*----------------------------------------------------------------------------*/
4379
Aurelien Jarnob6893622011-04-14 00:49:29 +02004380int float64_eq( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004381{
Andreas Färberbb98fe42011-03-07 01:34:06 +01004382 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004383 a = float64_squash_input_denormal(a STATUS_VAR);
4384 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004385
4386 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4387 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4388 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02004389 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004390 return 0;
4391 }
pbrookf090c9d2007-11-18 14:33:24 +00004392 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004393 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004394 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00004395
4396}
4397
4398/*----------------------------------------------------------------------------
4399| Returns 1 if the double-precision floating-point value `a' is less than or
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004400| equal to the corresponding value `b', and 0 otherwise. The invalid
4401| exception is raised if either operand is a NaN. The comparison is performed
4402| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004403*----------------------------------------------------------------------------*/
4404
bellard750afe92006-10-28 19:27:11 +00004405int float64_le( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004406{
4407 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004408 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004409 a = float64_squash_input_denormal(a STATUS_VAR);
4410 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004411
4412 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4413 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4414 ) {
4415 float_raise( float_flag_invalid STATUS_VAR);
4416 return 0;
4417 }
4418 aSign = extractFloat64Sign( a );
4419 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004420 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004421 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004422 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004423 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004424
4425}
4426
4427/*----------------------------------------------------------------------------
4428| Returns 1 if the double-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004429| the corresponding value `b', and 0 otherwise. The invalid exception is
4430| raised if either operand is a NaN. The comparison is performed according
4431| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004432*----------------------------------------------------------------------------*/
4433
bellard750afe92006-10-28 19:27:11 +00004434int float64_lt( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004435{
4436 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004437 uint64_t av, bv;
bellard158142c2005-03-13 16:54:06 +00004438
Peter Maydell37d18662011-01-06 19:37:53 +00004439 a = float64_squash_input_denormal(a STATUS_VAR);
4440 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004441 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4442 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4443 ) {
4444 float_raise( float_flag_invalid STATUS_VAR);
4445 return 0;
4446 }
4447 aSign = extractFloat64Sign( a );
4448 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004449 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004450 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004451 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004452 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004453
4454}
4455
4456/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02004457| Returns 1 if the double-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004458| be compared, and 0 otherwise. The invalid exception is raised if either
4459| operand is a NaN. The comparison is performed according to the IEC/IEEE
4460| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02004461*----------------------------------------------------------------------------*/
4462
4463int float64_unordered( float64 a, float64 b STATUS_PARAM )
4464{
4465 a = float64_squash_input_denormal(a STATUS_VAR);
4466 b = float64_squash_input_denormal(b STATUS_VAR);
4467
4468 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4469 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4470 ) {
4471 float_raise( float_flag_invalid STATUS_VAR);
4472 return 1;
4473 }
4474 return 0;
4475}
4476
4477/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00004478| Returns 1 if the double-precision floating-point value `a' is equal to the
Aurelien Jarnof5a64252011-04-14 00:49:30 +02004479| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4480| exception.The comparison is performed according to the IEC/IEEE Standard
4481| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00004482*----------------------------------------------------------------------------*/
4483
Aurelien Jarnob6893622011-04-14 00:49:29 +02004484int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004485{
Andreas Färberbb98fe42011-03-07 01:34:06 +01004486 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004487 a = float64_squash_input_denormal(a STATUS_VAR);
4488 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004489
4490 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4491 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4492 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02004493 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4494 float_raise( float_flag_invalid STATUS_VAR);
4495 }
bellard158142c2005-03-13 16:54:06 +00004496 return 0;
4497 }
pbrookf090c9d2007-11-18 14:33:24 +00004498 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004499 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004500 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
bellard158142c2005-03-13 16:54:06 +00004501
4502}
4503
4504/*----------------------------------------------------------------------------
4505| Returns 1 if the double-precision floating-point value `a' is less than or
4506| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4507| cause an exception. Otherwise, the comparison is performed according to the
4508| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4509*----------------------------------------------------------------------------*/
4510
bellard750afe92006-10-28 19:27:11 +00004511int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004512{
4513 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004514 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004515 a = float64_squash_input_denormal(a STATUS_VAR);
4516 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004517
4518 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4519 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4520 ) {
4521 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4522 float_raise( float_flag_invalid STATUS_VAR);
4523 }
4524 return 0;
4525 }
4526 aSign = extractFloat64Sign( a );
4527 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004528 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004529 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004530 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004531 return ( av == bv ) || ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004532
4533}
4534
4535/*----------------------------------------------------------------------------
4536| Returns 1 if the double-precision floating-point value `a' is less than
4537| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4538| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4539| Standard for Binary Floating-Point Arithmetic.
4540*----------------------------------------------------------------------------*/
4541
bellard750afe92006-10-28 19:27:11 +00004542int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00004543{
4544 flag aSign, bSign;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004545 uint64_t av, bv;
Peter Maydell37d18662011-01-06 19:37:53 +00004546 a = float64_squash_input_denormal(a STATUS_VAR);
4547 b = float64_squash_input_denormal(b STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00004548
4549 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4550 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4551 ) {
4552 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4553 float_raise( float_flag_invalid STATUS_VAR);
4554 }
4555 return 0;
4556 }
4557 aSign = extractFloat64Sign( a );
4558 bSign = extractFloat64Sign( b );
pbrookf090c9d2007-11-18 14:33:24 +00004559 av = float64_val(a);
pbrooka1b91bb2007-11-21 15:32:12 +00004560 bv = float64_val(b);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004561 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
pbrookf090c9d2007-11-18 14:33:24 +00004562 return ( av != bv ) && ( aSign ^ ( av < bv ) );
bellard158142c2005-03-13 16:54:06 +00004563
4564}
4565
Aurelien Jarno67b78612011-04-14 00:49:29 +02004566/*----------------------------------------------------------------------------
4567| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4568| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4569| comparison is performed according to the IEC/IEEE Standard for Binary
4570| Floating-Point Arithmetic.
4571*----------------------------------------------------------------------------*/
4572
4573int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
4574{
4575 a = float64_squash_input_denormal(a STATUS_VAR);
4576 b = float64_squash_input_denormal(b STATUS_VAR);
4577
4578 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4579 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4580 ) {
4581 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
4582 float_raise( float_flag_invalid STATUS_VAR);
4583 }
4584 return 1;
4585 }
4586 return 0;
4587}
4588
bellard158142c2005-03-13 16:54:06 +00004589/*----------------------------------------------------------------------------
4590| Returns the result of converting the extended double-precision floating-
4591| point value `a' to the 32-bit two's complement integer format. The
4592| conversion is performed according to the IEC/IEEE Standard for Binary
4593| Floating-Point Arithmetic---which means in particular that the conversion
4594| is rounded according to the current rounding mode. If `a' is a NaN, the
4595| largest positive integer is returned. Otherwise, if the conversion
4596| overflows, the largest integer with the same sign as `a' is returned.
4597*----------------------------------------------------------------------------*/
4598
4599int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
4600{
4601 flag aSign;
4602 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004603 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004604
4605 aSig = extractFloatx80Frac( a );
4606 aExp = extractFloatx80Exp( a );
4607 aSign = extractFloatx80Sign( a );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004608 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard158142c2005-03-13 16:54:06 +00004609 shiftCount = 0x4037 - aExp;
4610 if ( shiftCount <= 0 ) shiftCount = 1;
4611 shift64RightJamming( aSig, shiftCount, &aSig );
4612 return roundAndPackInt32( aSign, aSig STATUS_VAR );
4613
4614}
4615
4616/*----------------------------------------------------------------------------
4617| Returns the result of converting the extended double-precision floating-
4618| point value `a' to the 32-bit two's complement integer format. The
4619| conversion is performed according to the IEC/IEEE Standard for Binary
4620| Floating-Point Arithmetic, except that the conversion is always rounded
4621| toward zero. If `a' is a NaN, the largest positive integer is returned.
4622| Otherwise, if the conversion overflows, the largest integer with the same
4623| sign as `a' is returned.
4624*----------------------------------------------------------------------------*/
4625
4626int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
4627{
4628 flag aSign;
4629 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004630 uint64_t aSig, savedASig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01004631 int32_t z;
bellard158142c2005-03-13 16:54:06 +00004632
4633 aSig = extractFloatx80Frac( a );
4634 aExp = extractFloatx80Exp( a );
4635 aSign = extractFloatx80Sign( a );
4636 if ( 0x401E < aExp ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004637 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
bellard158142c2005-03-13 16:54:06 +00004638 goto invalid;
4639 }
4640 else if ( aExp < 0x3FFF ) {
4641 if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4642 return 0;
4643 }
4644 shiftCount = 0x403E - aExp;
4645 savedASig = aSig;
4646 aSig >>= shiftCount;
4647 z = aSig;
4648 if ( aSign ) z = - z;
4649 if ( ( z < 0 ) ^ aSign ) {
4650 invalid:
4651 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01004652 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00004653 }
4654 if ( ( aSig<<shiftCount ) != savedASig ) {
4655 STATUS(float_exception_flags) |= float_flag_inexact;
4656 }
4657 return z;
4658
4659}
4660
4661/*----------------------------------------------------------------------------
4662| Returns the result of converting the extended double-precision floating-
4663| point value `a' to the 64-bit two's complement integer format. The
4664| conversion is performed according to the IEC/IEEE Standard for Binary
4665| Floating-Point Arithmetic---which means in particular that the conversion
4666| is rounded according to the current rounding mode. If `a' is a NaN,
4667| the largest positive integer is returned. Otherwise, if the conversion
4668| overflows, the largest integer with the same sign as `a' is returned.
4669*----------------------------------------------------------------------------*/
4670
4671int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
4672{
4673 flag aSign;
4674 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004675 uint64_t aSig, aSigExtra;
bellard158142c2005-03-13 16:54:06 +00004676
4677 aSig = extractFloatx80Frac( a );
4678 aExp = extractFloatx80Exp( a );
4679 aSign = extractFloatx80Sign( a );
4680 shiftCount = 0x403E - aExp;
4681 if ( shiftCount <= 0 ) {
4682 if ( shiftCount ) {
4683 float_raise( float_flag_invalid STATUS_VAR);
4684 if ( ! aSign
4685 || ( ( aExp == 0x7FFF )
4686 && ( aSig != LIT64( 0x8000000000000000 ) ) )
4687 ) {
4688 return LIT64( 0x7FFFFFFFFFFFFFFF );
4689 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01004690 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00004691 }
4692 aSigExtra = 0;
4693 }
4694 else {
4695 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4696 }
4697 return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
4698
4699}
4700
4701/*----------------------------------------------------------------------------
4702| Returns the result of converting the extended double-precision floating-
4703| point value `a' to the 64-bit two's complement integer format. The
4704| conversion is performed according to the IEC/IEEE Standard for Binary
4705| Floating-Point Arithmetic, except that the conversion is always rounded
4706| toward zero. If `a' is a NaN, the largest positive integer is returned.
4707| Otherwise, if the conversion overflows, the largest integer with the same
4708| sign as `a' is returned.
4709*----------------------------------------------------------------------------*/
4710
4711int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
4712{
4713 flag aSign;
4714 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004715 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004716 int64 z;
4717
4718 aSig = extractFloatx80Frac( a );
4719 aExp = extractFloatx80Exp( a );
4720 aSign = extractFloatx80Sign( a );
4721 shiftCount = aExp - 0x403E;
4722 if ( 0 <= shiftCount ) {
4723 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4724 if ( ( a.high != 0xC03E ) || aSig ) {
4725 float_raise( float_flag_invalid STATUS_VAR);
4726 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4727 return LIT64( 0x7FFFFFFFFFFFFFFF );
4728 }
4729 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01004730 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00004731 }
4732 else if ( aExp < 0x3FFF ) {
4733 if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
4734 return 0;
4735 }
4736 z = aSig>>( - shiftCount );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004737 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00004738 STATUS(float_exception_flags) |= float_flag_inexact;
4739 }
4740 if ( aSign ) z = - z;
4741 return z;
4742
4743}
4744
4745/*----------------------------------------------------------------------------
4746| Returns the result of converting the extended double-precision floating-
4747| point value `a' to the single-precision floating-point format. The
4748| conversion is performed according to the IEC/IEEE Standard for Binary
4749| Floating-Point Arithmetic.
4750*----------------------------------------------------------------------------*/
4751
4752float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
4753{
4754 flag aSign;
4755 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004756 uint64_t aSig;
bellard158142c2005-03-13 16:54:06 +00004757
4758 aSig = extractFloatx80Frac( a );
4759 aExp = extractFloatx80Exp( a );
4760 aSign = extractFloatx80Sign( a );
4761 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004762 if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004763 return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004764 }
4765 return packFloat32( aSign, 0xFF, 0 );
4766 }
4767 shift64RightJamming( aSig, 33, &aSig );
4768 if ( aExp || aSig ) aExp -= 0x3F81;
4769 return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
4770
4771}
4772
4773/*----------------------------------------------------------------------------
4774| Returns the result of converting the extended double-precision floating-
4775| point value `a' to the double-precision floating-point format. The
4776| conversion is performed according to the IEC/IEEE Standard for Binary
4777| Floating-Point Arithmetic.
4778*----------------------------------------------------------------------------*/
4779
4780float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
4781{
4782 flag aSign;
4783 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004784 uint64_t aSig, zSig;
bellard158142c2005-03-13 16:54:06 +00004785
4786 aSig = extractFloatx80Frac( a );
4787 aExp = extractFloatx80Exp( a );
4788 aSign = extractFloatx80Sign( a );
4789 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004790 if ( (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004791 return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004792 }
4793 return packFloat64( aSign, 0x7FF, 0 );
4794 }
4795 shift64RightJamming( aSig, 1, &zSig );
4796 if ( aExp || aSig ) aExp -= 0x3C01;
4797 return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
4798
4799}
4800
bellard158142c2005-03-13 16:54:06 +00004801/*----------------------------------------------------------------------------
4802| Returns the result of converting the extended double-precision floating-
4803| point value `a' to the quadruple-precision floating-point format. The
4804| conversion is performed according to the IEC/IEEE Standard for Binary
4805| Floating-Point Arithmetic.
4806*----------------------------------------------------------------------------*/
4807
4808float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
4809{
4810 flag aSign;
Andreas Färber94a49d82012-04-26 00:15:56 +02004811 int_fast16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004812 uint64_t aSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004813
4814 aSig = extractFloatx80Frac( a );
4815 aExp = extractFloatx80Exp( a );
4816 aSign = extractFloatx80Sign( a );
Andreas Färberbb98fe42011-03-07 01:34:06 +01004817 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00004818 return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004819 }
4820 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4821 return packFloat128( aSign, aExp, zSig0, zSig1 );
4822
4823}
4824
bellard158142c2005-03-13 16:54:06 +00004825/*----------------------------------------------------------------------------
4826| Rounds the extended double-precision floating-point value `a' to an integer,
4827| and returns the result as an extended quadruple-precision floating-point
4828| value. The operation is performed according to the IEC/IEEE Standard for
4829| Binary Floating-Point Arithmetic.
4830*----------------------------------------------------------------------------*/
4831
4832floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
4833{
4834 flag aSign;
4835 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004836 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00004837 floatx80 z;
4838
4839 aExp = extractFloatx80Exp( a );
4840 if ( 0x403E <= aExp ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004841 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004842 return propagateFloatx80NaN( a, a STATUS_VAR );
4843 }
4844 return a;
4845 }
4846 if ( aExp < 0x3FFF ) {
4847 if ( ( aExp == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01004848 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
bellard158142c2005-03-13 16:54:06 +00004849 return a;
4850 }
4851 STATUS(float_exception_flags) |= float_flag_inexact;
4852 aSign = extractFloatx80Sign( a );
4853 switch ( STATUS(float_rounding_mode) ) {
4854 case float_round_nearest_even:
Andreas Färberbb98fe42011-03-07 01:34:06 +01004855 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
bellard158142c2005-03-13 16:54:06 +00004856 ) {
4857 return
4858 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4859 }
4860 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00004861 case float_round_ties_away:
4862 if (aExp == 0x3FFE) {
4863 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4864 }
4865 break;
bellard158142c2005-03-13 16:54:06 +00004866 case float_round_down:
4867 return
4868 aSign ?
4869 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4870 : packFloatx80( 0, 0, 0 );
4871 case float_round_up:
4872 return
4873 aSign ? packFloatx80( 1, 0, 0 )
4874 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4875 }
4876 return packFloatx80( aSign, 0, 0 );
4877 }
4878 lastBitMask = 1;
4879 lastBitMask <<= 0x403E - aExp;
4880 roundBitsMask = lastBitMask - 1;
4881 z = a;
Peter Maydelldc355b72014-01-07 17:19:12 +00004882 switch (STATUS(float_rounding_mode)) {
4883 case float_round_nearest_even:
bellard158142c2005-03-13 16:54:06 +00004884 z.low += lastBitMask>>1;
Peter Maydelldc355b72014-01-07 17:19:12 +00004885 if ((z.low & roundBitsMask) == 0) {
4886 z.low &= ~lastBitMask;
4887 }
4888 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00004889 case float_round_ties_away:
4890 z.low += lastBitMask >> 1;
4891 break;
Peter Maydelldc355b72014-01-07 17:19:12 +00004892 case float_round_to_zero:
4893 break;
4894 case float_round_up:
4895 if (!extractFloatx80Sign(z)) {
bellard158142c2005-03-13 16:54:06 +00004896 z.low += roundBitsMask;
4897 }
Peter Maydelldc355b72014-01-07 17:19:12 +00004898 break;
4899 case float_round_down:
4900 if (extractFloatx80Sign(z)) {
4901 z.low += roundBitsMask;
4902 }
4903 break;
4904 default:
4905 abort();
bellard158142c2005-03-13 16:54:06 +00004906 }
4907 z.low &= ~ roundBitsMask;
4908 if ( z.low == 0 ) {
4909 ++z.high;
4910 z.low = LIT64( 0x8000000000000000 );
4911 }
4912 if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
4913 return z;
4914
4915}
4916
4917/*----------------------------------------------------------------------------
4918| Returns the result of adding the absolute values of the extended double-
4919| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4920| negated before being returned. `zSign' is ignored if the result is a NaN.
4921| The addition is performed according to the IEC/IEEE Standard for Binary
4922| Floating-Point Arithmetic.
4923*----------------------------------------------------------------------------*/
4924
4925static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
4926{
4927 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004928 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004929 int32 expDiff;
4930
4931 aSig = extractFloatx80Frac( a );
4932 aExp = extractFloatx80Exp( a );
4933 bSig = extractFloatx80Frac( b );
4934 bExp = extractFloatx80Exp( b );
4935 expDiff = aExp - bExp;
4936 if ( 0 < expDiff ) {
4937 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004938 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004939 return a;
4940 }
4941 if ( bExp == 0 ) --expDiff;
4942 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4943 zExp = aExp;
4944 }
4945 else if ( expDiff < 0 ) {
4946 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004947 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00004948 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4949 }
4950 if ( aExp == 0 ) ++expDiff;
4951 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4952 zExp = bExp;
4953 }
4954 else {
4955 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01004956 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00004957 return propagateFloatx80NaN( a, b STATUS_VAR );
4958 }
4959 return a;
4960 }
4961 zSig1 = 0;
4962 zSig0 = aSig + bSig;
4963 if ( aExp == 0 ) {
4964 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4965 goto roundAndPack;
4966 }
4967 zExp = aExp;
4968 goto shiftRight1;
4969 }
4970 zSig0 = aSig + bSig;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004971 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
bellard158142c2005-03-13 16:54:06 +00004972 shiftRight1:
4973 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4974 zSig0 |= LIT64( 0x8000000000000000 );
4975 ++zExp;
4976 roundAndPack:
4977 return
4978 roundAndPackFloatx80(
4979 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
4980
4981}
4982
4983/*----------------------------------------------------------------------------
4984| Returns the result of subtracting the absolute values of the extended
4985| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4986| difference is negated before being returned. `zSign' is ignored if the
4987| result is a NaN. The subtraction is performed according to the IEC/IEEE
4988| Standard for Binary Floating-Point Arithmetic.
4989*----------------------------------------------------------------------------*/
4990
4991static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
4992{
4993 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01004994 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00004995 int32 expDiff;
4996 floatx80 z;
4997
4998 aSig = extractFloatx80Frac( a );
4999 aExp = extractFloatx80Exp( a );
5000 bSig = extractFloatx80Frac( b );
5001 bExp = extractFloatx80Exp( b );
5002 expDiff = aExp - bExp;
5003 if ( 0 < expDiff ) goto aExpBigger;
5004 if ( expDiff < 0 ) goto bExpBigger;
5005 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005006 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
bellard158142c2005-03-13 16:54:06 +00005007 return propagateFloatx80NaN( a, b STATUS_VAR );
5008 }
5009 float_raise( float_flag_invalid STATUS_VAR);
5010 z.low = floatx80_default_nan_low;
5011 z.high = floatx80_default_nan_high;
5012 return z;
5013 }
5014 if ( aExp == 0 ) {
5015 aExp = 1;
5016 bExp = 1;
5017 }
5018 zSig1 = 0;
5019 if ( bSig < aSig ) goto aBigger;
5020 if ( aSig < bSig ) goto bBigger;
5021 return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
5022 bExpBigger:
5023 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005024 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005025 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
5026 }
5027 if ( aExp == 0 ) ++expDiff;
5028 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
5029 bBigger:
5030 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
5031 zExp = bExp;
5032 zSign ^= 1;
5033 goto normalizeRoundAndPack;
5034 aExpBigger:
5035 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005036 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005037 return a;
5038 }
5039 if ( bExp == 0 ) --expDiff;
5040 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
5041 aBigger:
5042 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
5043 zExp = aExp;
5044 normalizeRoundAndPack:
5045 return
5046 normalizeRoundAndPackFloatx80(
5047 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
5048
5049}
5050
5051/*----------------------------------------------------------------------------
5052| Returns the result of adding the extended double-precision floating-point
5053| values `a' and `b'. The operation is performed according to the IEC/IEEE
5054| Standard for Binary Floating-Point Arithmetic.
5055*----------------------------------------------------------------------------*/
5056
5057floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
5058{
5059 flag aSign, bSign;
5060
5061 aSign = extractFloatx80Sign( a );
5062 bSign = extractFloatx80Sign( b );
5063 if ( aSign == bSign ) {
5064 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
5065 }
5066 else {
5067 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
5068 }
5069
5070}
5071
5072/*----------------------------------------------------------------------------
5073| Returns the result of subtracting the extended double-precision floating-
5074| point values `a' and `b'. The operation is performed according to the
5075| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5076*----------------------------------------------------------------------------*/
5077
5078floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
5079{
5080 flag aSign, bSign;
5081
5082 aSign = extractFloatx80Sign( a );
5083 bSign = extractFloatx80Sign( b );
5084 if ( aSign == bSign ) {
5085 return subFloatx80Sigs( a, b, aSign STATUS_VAR );
5086 }
5087 else {
5088 return addFloatx80Sigs( a, b, aSign STATUS_VAR );
5089 }
5090
5091}
5092
5093/*----------------------------------------------------------------------------
5094| Returns the result of multiplying the extended double-precision floating-
5095| point values `a' and `b'. The operation is performed according to the
5096| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5097*----------------------------------------------------------------------------*/
5098
5099floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
5100{
5101 flag aSign, bSign, zSign;
5102 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005103 uint64_t aSig, bSig, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00005104 floatx80 z;
5105
5106 aSig = extractFloatx80Frac( a );
5107 aExp = extractFloatx80Exp( a );
5108 aSign = extractFloatx80Sign( a );
5109 bSig = extractFloatx80Frac( b );
5110 bExp = extractFloatx80Exp( b );
5111 bSign = extractFloatx80Sign( b );
5112 zSign = aSign ^ bSign;
5113 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005114 if ( (uint64_t) ( aSig<<1 )
5115 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00005116 return propagateFloatx80NaN( a, b STATUS_VAR );
5117 }
5118 if ( ( bExp | bSig ) == 0 ) goto invalid;
5119 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5120 }
5121 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005122 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005123 if ( ( aExp | aSig ) == 0 ) {
5124 invalid:
5125 float_raise( float_flag_invalid STATUS_VAR);
5126 z.low = floatx80_default_nan_low;
5127 z.high = floatx80_default_nan_high;
5128 return z;
5129 }
5130 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5131 }
5132 if ( aExp == 0 ) {
5133 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5134 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5135 }
5136 if ( bExp == 0 ) {
5137 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
5138 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5139 }
5140 zExp = aExp + bExp - 0x3FFE;
5141 mul64To128( aSig, bSig, &zSig0, &zSig1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005142 if ( 0 < (int64_t) zSig0 ) {
bellard158142c2005-03-13 16:54:06 +00005143 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
5144 --zExp;
5145 }
5146 return
5147 roundAndPackFloatx80(
5148 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
5149
5150}
5151
5152/*----------------------------------------------------------------------------
5153| Returns the result of dividing the extended double-precision floating-point
5154| value `a' by the corresponding value `b'. The operation is performed
5155| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5156*----------------------------------------------------------------------------*/
5157
5158floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
5159{
5160 flag aSign, bSign, zSign;
5161 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005162 uint64_t aSig, bSig, zSig0, zSig1;
5163 uint64_t rem0, rem1, rem2, term0, term1, term2;
bellard158142c2005-03-13 16:54:06 +00005164 floatx80 z;
5165
5166 aSig = extractFloatx80Frac( a );
5167 aExp = extractFloatx80Exp( a );
5168 aSign = extractFloatx80Sign( a );
5169 bSig = extractFloatx80Frac( b );
5170 bExp = extractFloatx80Exp( b );
5171 bSign = extractFloatx80Sign( b );
5172 zSign = aSign ^ bSign;
5173 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005174 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005175 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005176 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005177 goto invalid;
5178 }
5179 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5180 }
5181 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005182 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005183 return packFloatx80( zSign, 0, 0 );
5184 }
5185 if ( bExp == 0 ) {
5186 if ( bSig == 0 ) {
5187 if ( ( aExp | aSig ) == 0 ) {
5188 invalid:
5189 float_raise( float_flag_invalid STATUS_VAR);
5190 z.low = floatx80_default_nan_low;
5191 z.high = floatx80_default_nan_high;
5192 return z;
5193 }
5194 float_raise( float_flag_divbyzero STATUS_VAR);
5195 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5196 }
5197 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5198 }
5199 if ( aExp == 0 ) {
5200 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
5201 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
5202 }
5203 zExp = aExp - bExp + 0x3FFE;
5204 rem1 = 0;
5205 if ( bSig <= aSig ) {
5206 shift128Right( aSig, 0, 1, &aSig, &rem1 );
5207 ++zExp;
5208 }
5209 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
5210 mul64To128( bSig, zSig0, &term0, &term1 );
5211 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005212 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005213 --zSig0;
5214 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
5215 }
5216 zSig1 = estimateDiv128To64( rem1, 0, bSig );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005217 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
bellard158142c2005-03-13 16:54:06 +00005218 mul64To128( bSig, zSig1, &term1, &term2 );
5219 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005220 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005221 --zSig1;
5222 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
5223 }
5224 zSig1 |= ( ( rem1 | rem2 ) != 0 );
5225 }
5226 return
5227 roundAndPackFloatx80(
5228 STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
5229
5230}
5231
5232/*----------------------------------------------------------------------------
5233| Returns the remainder of the extended double-precision floating-point value
5234| `a' with respect to the corresponding value `b'. The operation is performed
5235| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5236*----------------------------------------------------------------------------*/
5237
5238floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
5239{
Blue Swirled086f32010-03-07 13:49:58 +00005240 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00005241 int32 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005242 uint64_t aSig0, aSig1, bSig;
5243 uint64_t q, term0, term1, alternateASig0, alternateASig1;
bellard158142c2005-03-13 16:54:06 +00005244 floatx80 z;
5245
5246 aSig0 = extractFloatx80Frac( a );
5247 aExp = extractFloatx80Exp( a );
5248 aSign = extractFloatx80Sign( a );
5249 bSig = extractFloatx80Frac( b );
5250 bExp = extractFloatx80Exp( b );
bellard158142c2005-03-13 16:54:06 +00005251 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005252 if ( (uint64_t) ( aSig0<<1 )
5253 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
bellard158142c2005-03-13 16:54:06 +00005254 return propagateFloatx80NaN( a, b STATUS_VAR );
5255 }
5256 goto invalid;
5257 }
5258 if ( bExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005259 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005260 return a;
5261 }
5262 if ( bExp == 0 ) {
5263 if ( bSig == 0 ) {
5264 invalid:
5265 float_raise( float_flag_invalid STATUS_VAR);
5266 z.low = floatx80_default_nan_low;
5267 z.high = floatx80_default_nan_high;
5268 return z;
5269 }
5270 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5271 }
5272 if ( aExp == 0 ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005273 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00005274 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5275 }
5276 bSig |= LIT64( 0x8000000000000000 );
5277 zSign = aSign;
5278 expDiff = aExp - bExp;
5279 aSig1 = 0;
5280 if ( expDiff < 0 ) {
5281 if ( expDiff < -1 ) return a;
5282 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5283 expDiff = 0;
5284 }
5285 q = ( bSig <= aSig0 );
5286 if ( q ) aSig0 -= bSig;
5287 expDiff -= 64;
5288 while ( 0 < expDiff ) {
5289 q = estimateDiv128To64( aSig0, aSig1, bSig );
5290 q = ( 2 < q ) ? q - 2 : 0;
5291 mul64To128( bSig, q, &term0, &term1 );
5292 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5293 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5294 expDiff -= 62;
5295 }
5296 expDiff += 64;
5297 if ( 0 < expDiff ) {
5298 q = estimateDiv128To64( aSig0, aSig1, bSig );
5299 q = ( 2 < q ) ? q - 2 : 0;
5300 q >>= 64 - expDiff;
5301 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5302 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5303 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5304 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5305 ++q;
5306 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5307 }
5308 }
5309 else {
5310 term1 = 0;
5311 term0 = bSig;
5312 }
5313 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5314 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5315 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5316 && ( q & 1 ) )
5317 ) {
5318 aSig0 = alternateASig0;
5319 aSig1 = alternateASig1;
5320 zSign = ! zSign;
5321 }
5322 return
5323 normalizeRoundAndPackFloatx80(
5324 80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
5325
5326}
5327
5328/*----------------------------------------------------------------------------
5329| Returns the square root of the extended double-precision floating-point
5330| value `a'. The operation is performed according to the IEC/IEEE Standard
5331| for Binary Floating-Point Arithmetic.
5332*----------------------------------------------------------------------------*/
5333
5334floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
5335{
5336 flag aSign;
5337 int32 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005338 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5339 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00005340 floatx80 z;
5341
5342 aSig0 = extractFloatx80Frac( a );
5343 aExp = extractFloatx80Exp( a );
5344 aSign = extractFloatx80Sign( a );
5345 if ( aExp == 0x7FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005346 if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005347 if ( ! aSign ) return a;
5348 goto invalid;
5349 }
5350 if ( aSign ) {
5351 if ( ( aExp | aSig0 ) == 0 ) return a;
5352 invalid:
5353 float_raise( float_flag_invalid STATUS_VAR);
5354 z.low = floatx80_default_nan_low;
5355 z.high = floatx80_default_nan_high;
5356 return z;
5357 }
5358 if ( aExp == 0 ) {
5359 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5360 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5361 }
5362 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5363 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5364 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5365 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5366 doubleZSig0 = zSig0<<1;
5367 mul64To128( zSig0, zSig0, &term0, &term1 );
5368 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005369 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005370 --zSig0;
5371 doubleZSig0 -= 2;
5372 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5373 }
5374 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5375 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5376 if ( zSig1 == 0 ) zSig1 = 1;
5377 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5378 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5379 mul64To128( zSig1, zSig1, &term2, &term3 );
5380 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005381 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005382 --zSig1;
5383 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5384 term3 |= 1;
5385 term2 |= doubleZSig0;
5386 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5387 }
5388 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5389 }
5390 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5391 zSig0 |= doubleZSig0;
5392 return
5393 roundAndPackFloatx80(
5394 STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
5395
5396}
5397
5398/*----------------------------------------------------------------------------
Aurelien Jarnob6893622011-04-14 00:49:29 +02005399| Returns 1 if the extended double-precision floating-point value `a' is equal
5400| to the corresponding value `b', and 0 otherwise. The invalid exception is
5401| raised if either operand is a NaN. Otherwise, the comparison is performed
5402| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005403*----------------------------------------------------------------------------*/
5404
Aurelien Jarnob6893622011-04-14 00:49:29 +02005405int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005406{
5407
5408 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005409 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005410 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005411 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005412 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02005413 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00005414 return 0;
5415 }
5416 return
5417 ( a.low == b.low )
5418 && ( ( a.high == b.high )
5419 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005420 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00005421 );
5422
5423}
5424
5425/*----------------------------------------------------------------------------
5426| Returns 1 if the extended double-precision floating-point value `a' is
5427| less than or equal to the corresponding value `b', and 0 otherwise. The
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005428| invalid exception is raised if either operand is a NaN. The comparison is
5429| performed according to the IEC/IEEE Standard for Binary Floating-Point
5430| Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005431*----------------------------------------------------------------------------*/
5432
bellard750afe92006-10-28 19:27:11 +00005433int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005434{
5435 flag aSign, bSign;
5436
5437 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005438 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005439 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005440 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005441 ) {
5442 float_raise( float_flag_invalid STATUS_VAR);
5443 return 0;
5444 }
5445 aSign = extractFloatx80Sign( a );
5446 bSign = extractFloatx80Sign( b );
5447 if ( aSign != bSign ) {
5448 return
5449 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005450 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005451 == 0 );
5452 }
5453 return
5454 aSign ? le128( b.high, b.low, a.high, a.low )
5455 : le128( a.high, a.low, b.high, b.low );
5456
5457}
5458
5459/*----------------------------------------------------------------------------
5460| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005461| less than the corresponding value `b', and 0 otherwise. The invalid
5462| exception is raised if either operand is a NaN. The comparison is performed
5463| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005464*----------------------------------------------------------------------------*/
5465
bellard750afe92006-10-28 19:27:11 +00005466int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005467{
5468 flag aSign, bSign;
5469
5470 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005471 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005472 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005473 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005474 ) {
5475 float_raise( float_flag_invalid STATUS_VAR);
5476 return 0;
5477 }
5478 aSign = extractFloatx80Sign( a );
5479 bSign = extractFloatx80Sign( b );
5480 if ( aSign != bSign ) {
5481 return
5482 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005483 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005484 != 0 );
5485 }
5486 return
5487 aSign ? lt128( b.high, b.low, a.high, a.low )
5488 : lt128( a.high, a.low, b.high, b.low );
5489
5490}
5491
5492/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02005493| Returns 1 if the extended double-precision floating-point values `a' and `b'
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005494| cannot be compared, and 0 otherwise. The invalid exception is raised if
5495| either operand is a NaN. The comparison is performed according to the
5496| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02005497*----------------------------------------------------------------------------*/
5498int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
5499{
5500 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5501 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5502 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5503 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5504 ) {
5505 float_raise( float_flag_invalid STATUS_VAR);
5506 return 1;
5507 }
5508 return 0;
5509}
5510
5511/*----------------------------------------------------------------------------
Aurelien Jarnob6893622011-04-14 00:49:29 +02005512| Returns 1 if the extended double-precision floating-point value `a' is
Aurelien Jarnof5a64252011-04-14 00:49:30 +02005513| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5514| cause an exception. The comparison is performed according to the IEC/IEEE
5515| Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00005516*----------------------------------------------------------------------------*/
5517
Aurelien Jarnob6893622011-04-14 00:49:29 +02005518int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005519{
5520
5521 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005522 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005523 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005524 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005525 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02005526 if ( floatx80_is_signaling_nan( a )
5527 || floatx80_is_signaling_nan( b ) ) {
5528 float_raise( float_flag_invalid STATUS_VAR);
5529 }
bellard158142c2005-03-13 16:54:06 +00005530 return 0;
5531 }
5532 return
5533 ( a.low == b.low )
5534 && ( ( a.high == b.high )
5535 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005536 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00005537 );
5538
5539}
5540
5541/*----------------------------------------------------------------------------
5542| Returns 1 if the extended double-precision floating-point value `a' is less
5543| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5544| do not cause an exception. Otherwise, the comparison is performed according
5545| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5546*----------------------------------------------------------------------------*/
5547
bellard750afe92006-10-28 19:27:11 +00005548int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005549{
5550 flag aSign, bSign;
5551
5552 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005553 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005554 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005555 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005556 ) {
5557 if ( floatx80_is_signaling_nan( a )
5558 || floatx80_is_signaling_nan( b ) ) {
5559 float_raise( float_flag_invalid STATUS_VAR);
5560 }
5561 return 0;
5562 }
5563 aSign = extractFloatx80Sign( a );
5564 bSign = extractFloatx80Sign( b );
5565 if ( aSign != bSign ) {
5566 return
5567 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005568 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005569 == 0 );
5570 }
5571 return
5572 aSign ? le128( b.high, b.low, a.high, a.low )
5573 : le128( a.high, a.low, b.high, b.low );
5574
5575}
5576
5577/*----------------------------------------------------------------------------
5578| Returns 1 if the extended double-precision floating-point value `a' is less
5579| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5580| an exception. Otherwise, the comparison is performed according to the
5581| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5582*----------------------------------------------------------------------------*/
5583
bellard750afe92006-10-28 19:27:11 +00005584int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00005585{
5586 flag aSign, bSign;
5587
5588 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005589 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005590 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
Andreas Färberbb98fe42011-03-07 01:34:06 +01005591 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
bellard158142c2005-03-13 16:54:06 +00005592 ) {
5593 if ( floatx80_is_signaling_nan( a )
5594 || floatx80_is_signaling_nan( b ) ) {
5595 float_raise( float_flag_invalid STATUS_VAR);
5596 }
5597 return 0;
5598 }
5599 aSign = extractFloatx80Sign( a );
5600 bSign = extractFloatx80Sign( b );
5601 if ( aSign != bSign ) {
5602 return
5603 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01005604 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00005605 != 0 );
5606 }
5607 return
5608 aSign ? lt128( b.high, b.low, a.high, a.low )
5609 : lt128( a.high, a.low, b.high, b.low );
5610
5611}
5612
Aurelien Jarno67b78612011-04-14 00:49:29 +02005613/*----------------------------------------------------------------------------
5614| Returns 1 if the extended double-precision floating-point values `a' and `b'
5615| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5616| The comparison is performed according to the IEC/IEEE Standard for Binary
5617| Floating-Point Arithmetic.
5618*----------------------------------------------------------------------------*/
5619int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
5620{
5621 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5622 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5623 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5624 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5625 ) {
5626 if ( floatx80_is_signaling_nan( a )
5627 || floatx80_is_signaling_nan( b ) ) {
5628 float_raise( float_flag_invalid STATUS_VAR);
5629 }
5630 return 1;
5631 }
5632 return 0;
5633}
5634
bellard158142c2005-03-13 16:54:06 +00005635/*----------------------------------------------------------------------------
5636| Returns the result of converting the quadruple-precision floating-point
5637| value `a' to the 32-bit two's complement integer format. The conversion
5638| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5639| Arithmetic---which means in particular that the conversion is rounded
5640| according to the current rounding mode. If `a' is a NaN, the largest
5641| positive integer is returned. Otherwise, if the conversion overflows, the
5642| largest integer with the same sign as `a' is returned.
5643*----------------------------------------------------------------------------*/
5644
5645int32 float128_to_int32( float128 a STATUS_PARAM )
5646{
5647 flag aSign;
5648 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005649 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005650
5651 aSig1 = extractFloat128Frac1( a );
5652 aSig0 = extractFloat128Frac0( a );
5653 aExp = extractFloat128Exp( a );
5654 aSign = extractFloat128Sign( a );
5655 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5656 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5657 aSig0 |= ( aSig1 != 0 );
5658 shiftCount = 0x4028 - aExp;
5659 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
5660 return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
5661
5662}
5663
5664/*----------------------------------------------------------------------------
5665| Returns the result of converting the quadruple-precision floating-point
5666| value `a' to the 32-bit two's complement integer format. The conversion
5667| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5668| Arithmetic, except that the conversion is always rounded toward zero. If
5669| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5670| conversion overflows, the largest integer with the same sign as `a' is
5671| returned.
5672*----------------------------------------------------------------------------*/
5673
5674int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
5675{
5676 flag aSign;
5677 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005678 uint64_t aSig0, aSig1, savedASig;
Peter Maydellb3a6a2e2012-04-05 19:12:34 +01005679 int32_t z;
bellard158142c2005-03-13 16:54:06 +00005680
5681 aSig1 = extractFloat128Frac1( a );
5682 aSig0 = extractFloat128Frac0( a );
5683 aExp = extractFloat128Exp( a );
5684 aSign = extractFloat128Sign( a );
5685 aSig0 |= ( aSig1 != 0 );
5686 if ( 0x401E < aExp ) {
5687 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5688 goto invalid;
5689 }
5690 else if ( aExp < 0x3FFF ) {
5691 if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
5692 return 0;
5693 }
5694 aSig0 |= LIT64( 0x0001000000000000 );
5695 shiftCount = 0x402F - aExp;
5696 savedASig = aSig0;
5697 aSig0 >>= shiftCount;
5698 z = aSig0;
5699 if ( aSign ) z = - z;
5700 if ( ( z < 0 ) ^ aSign ) {
5701 invalid:
5702 float_raise( float_flag_invalid STATUS_VAR);
Andreas Färberbb98fe42011-03-07 01:34:06 +01005703 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
bellard158142c2005-03-13 16:54:06 +00005704 }
5705 if ( ( aSig0<<shiftCount ) != savedASig ) {
5706 STATUS(float_exception_flags) |= float_flag_inexact;
5707 }
5708 return z;
5709
5710}
5711
5712/*----------------------------------------------------------------------------
5713| Returns the result of converting the quadruple-precision floating-point
5714| value `a' to the 64-bit two's complement integer format. The conversion
5715| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5716| Arithmetic---which means in particular that the conversion is rounded
5717| according to the current rounding mode. If `a' is a NaN, the largest
5718| positive integer is returned. Otherwise, if the conversion overflows, the
5719| largest integer with the same sign as `a' is returned.
5720*----------------------------------------------------------------------------*/
5721
5722int64 float128_to_int64( float128 a STATUS_PARAM )
5723{
5724 flag aSign;
5725 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005726 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005727
5728 aSig1 = extractFloat128Frac1( a );
5729 aSig0 = extractFloat128Frac0( a );
5730 aExp = extractFloat128Exp( a );
5731 aSign = extractFloat128Sign( a );
5732 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5733 shiftCount = 0x402F - aExp;
5734 if ( shiftCount <= 0 ) {
5735 if ( 0x403E < aExp ) {
5736 float_raise( float_flag_invalid STATUS_VAR);
5737 if ( ! aSign
5738 || ( ( aExp == 0x7FFF )
5739 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5740 )
5741 ) {
5742 return LIT64( 0x7FFFFFFFFFFFFFFF );
5743 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01005744 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00005745 }
5746 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5747 }
5748 else {
5749 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5750 }
5751 return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
5752
5753}
5754
5755/*----------------------------------------------------------------------------
5756| Returns the result of converting the quadruple-precision floating-point
5757| value `a' to the 64-bit two's complement integer format. The conversion
5758| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5759| Arithmetic, except that the conversion is always rounded toward zero.
5760| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5761| the conversion overflows, the largest integer with the same sign as `a' is
5762| returned.
5763*----------------------------------------------------------------------------*/
5764
5765int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
5766{
5767 flag aSign;
5768 int32 aExp, shiftCount;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005769 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005770 int64 z;
5771
5772 aSig1 = extractFloat128Frac1( a );
5773 aSig0 = extractFloat128Frac0( a );
5774 aExp = extractFloat128Exp( a );
5775 aSign = extractFloat128Sign( a );
5776 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5777 shiftCount = aExp - 0x402F;
5778 if ( 0 < shiftCount ) {
5779 if ( 0x403E <= aExp ) {
5780 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5781 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5782 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
5783 if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
5784 }
5785 else {
5786 float_raise( float_flag_invalid STATUS_VAR);
5787 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5788 return LIT64( 0x7FFFFFFFFFFFFFFF );
5789 }
5790 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01005791 return (int64_t) LIT64( 0x8000000000000000 );
bellard158142c2005-03-13 16:54:06 +00005792 }
5793 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
Andreas Färberbb98fe42011-03-07 01:34:06 +01005794 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
bellard158142c2005-03-13 16:54:06 +00005795 STATUS(float_exception_flags) |= float_flag_inexact;
5796 }
5797 }
5798 else {
5799 if ( aExp < 0x3FFF ) {
5800 if ( aExp | aSig0 | aSig1 ) {
5801 STATUS(float_exception_flags) |= float_flag_inexact;
5802 }
5803 return 0;
5804 }
5805 z = aSig0>>( - shiftCount );
5806 if ( aSig1
Andreas Färberbb98fe42011-03-07 01:34:06 +01005807 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
bellard158142c2005-03-13 16:54:06 +00005808 STATUS(float_exception_flags) |= float_flag_inexact;
5809 }
5810 }
5811 if ( aSign ) z = - z;
5812 return z;
5813
5814}
5815
5816/*----------------------------------------------------------------------------
5817| Returns the result of converting the quadruple-precision floating-point
5818| value `a' to the single-precision floating-point format. The conversion
5819| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5820| Arithmetic.
5821*----------------------------------------------------------------------------*/
5822
5823float32 float128_to_float32( float128 a STATUS_PARAM )
5824{
5825 flag aSign;
5826 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005827 uint64_t aSig0, aSig1;
5828 uint32_t zSig;
bellard158142c2005-03-13 16:54:06 +00005829
5830 aSig1 = extractFloat128Frac1( a );
5831 aSig0 = extractFloat128Frac0( a );
5832 aExp = extractFloat128Exp( a );
5833 aSign = extractFloat128Sign( a );
5834 if ( aExp == 0x7FFF ) {
5835 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005836 return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005837 }
5838 return packFloat32( aSign, 0xFF, 0 );
5839 }
5840 aSig0 |= ( aSig1 != 0 );
5841 shift64RightJamming( aSig0, 18, &aSig0 );
5842 zSig = aSig0;
5843 if ( aExp || zSig ) {
5844 zSig |= 0x40000000;
5845 aExp -= 0x3F81;
5846 }
5847 return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
5848
5849}
5850
5851/*----------------------------------------------------------------------------
5852| Returns the result of converting the quadruple-precision floating-point
5853| value `a' to the double-precision floating-point format. The conversion
5854| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5855| Arithmetic.
5856*----------------------------------------------------------------------------*/
5857
5858float64 float128_to_float64( float128 a STATUS_PARAM )
5859{
5860 flag aSign;
5861 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005862 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005863
5864 aSig1 = extractFloat128Frac1( a );
5865 aSig0 = extractFloat128Frac0( a );
5866 aExp = extractFloat128Exp( a );
5867 aSign = extractFloat128Sign( a );
5868 if ( aExp == 0x7FFF ) {
5869 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005870 return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005871 }
5872 return packFloat64( aSign, 0x7FF, 0 );
5873 }
5874 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5875 aSig0 |= ( aSig1 != 0 );
5876 if ( aExp || aSig0 ) {
5877 aSig0 |= LIT64( 0x4000000000000000 );
5878 aExp -= 0x3C01;
5879 }
5880 return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
5881
5882}
5883
bellard158142c2005-03-13 16:54:06 +00005884/*----------------------------------------------------------------------------
5885| Returns the result of converting the quadruple-precision floating-point
5886| value `a' to the extended double-precision floating-point format. The
5887| conversion is performed according to the IEC/IEEE Standard for Binary
5888| Floating-Point Arithmetic.
5889*----------------------------------------------------------------------------*/
5890
5891floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
5892{
5893 flag aSign;
5894 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005895 uint64_t aSig0, aSig1;
bellard158142c2005-03-13 16:54:06 +00005896
5897 aSig1 = extractFloat128Frac1( a );
5898 aSig0 = extractFloat128Frac0( a );
5899 aExp = extractFloat128Exp( a );
5900 aSign = extractFloat128Sign( a );
5901 if ( aExp == 0x7FFF ) {
5902 if ( aSig0 | aSig1 ) {
Christophe Lyonbcd4d9a2011-02-10 11:28:57 +00005903 return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
bellard158142c2005-03-13 16:54:06 +00005904 }
5905 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
5906 }
5907 if ( aExp == 0 ) {
5908 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5909 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5910 }
5911 else {
5912 aSig0 |= LIT64( 0x0001000000000000 );
5913 }
5914 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
5915 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
5916
5917}
5918
bellard158142c2005-03-13 16:54:06 +00005919/*----------------------------------------------------------------------------
5920| Rounds the quadruple-precision floating-point value `a' to an integer, and
5921| returns the result as a quadruple-precision floating-point value. The
5922| operation is performed according to the IEC/IEEE Standard for Binary
5923| Floating-Point Arithmetic.
5924*----------------------------------------------------------------------------*/
5925
5926float128 float128_round_to_int( float128 a STATUS_PARAM )
5927{
5928 flag aSign;
5929 int32 aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005930 uint64_t lastBitMask, roundBitsMask;
bellard158142c2005-03-13 16:54:06 +00005931 float128 z;
5932
5933 aExp = extractFloat128Exp( a );
5934 if ( 0x402F <= aExp ) {
5935 if ( 0x406F <= aExp ) {
5936 if ( ( aExp == 0x7FFF )
5937 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5938 ) {
5939 return propagateFloat128NaN( a, a STATUS_VAR );
5940 }
5941 return a;
5942 }
5943 lastBitMask = 1;
5944 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5945 roundBitsMask = lastBitMask - 1;
5946 z = a;
Peter Maydelldc355b72014-01-07 17:19:12 +00005947 switch (STATUS(float_rounding_mode)) {
5948 case float_round_nearest_even:
bellard158142c2005-03-13 16:54:06 +00005949 if ( lastBitMask ) {
5950 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5951 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5952 }
5953 else {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005954 if ( (int64_t) z.low < 0 ) {
bellard158142c2005-03-13 16:54:06 +00005955 ++z.high;
Andreas Färberbb98fe42011-03-07 01:34:06 +01005956 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
bellard158142c2005-03-13 16:54:06 +00005957 }
5958 }
Peter Maydelldc355b72014-01-07 17:19:12 +00005959 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00005960 case float_round_ties_away:
5961 if (lastBitMask) {
5962 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
5963 } else {
5964 if ((int64_t) z.low < 0) {
5965 ++z.high;
5966 }
5967 }
5968 break;
Peter Maydelldc355b72014-01-07 17:19:12 +00005969 case float_round_to_zero:
5970 break;
5971 case float_round_up:
5972 if (!extractFloat128Sign(z)) {
5973 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
bellard158142c2005-03-13 16:54:06 +00005974 }
Peter Maydelldc355b72014-01-07 17:19:12 +00005975 break;
5976 case float_round_down:
5977 if (extractFloat128Sign(z)) {
5978 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
5979 }
5980 break;
5981 default:
5982 abort();
bellard158142c2005-03-13 16:54:06 +00005983 }
5984 z.low &= ~ roundBitsMask;
5985 }
5986 else {
5987 if ( aExp < 0x3FFF ) {
Andreas Färberbb98fe42011-03-07 01:34:06 +01005988 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
bellard158142c2005-03-13 16:54:06 +00005989 STATUS(float_exception_flags) |= float_flag_inexact;
5990 aSign = extractFloat128Sign( a );
5991 switch ( STATUS(float_rounding_mode) ) {
5992 case float_round_nearest_even:
5993 if ( ( aExp == 0x3FFE )
5994 && ( extractFloat128Frac0( a )
5995 | extractFloat128Frac1( a ) )
5996 ) {
5997 return packFloat128( aSign, 0x3FFF, 0, 0 );
5998 }
5999 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00006000 case float_round_ties_away:
6001 if (aExp == 0x3FFE) {
6002 return packFloat128(aSign, 0x3FFF, 0, 0);
6003 }
6004 break;
bellard158142c2005-03-13 16:54:06 +00006005 case float_round_down:
6006 return
6007 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
6008 : packFloat128( 0, 0, 0, 0 );
6009 case float_round_up:
6010 return
6011 aSign ? packFloat128( 1, 0, 0, 0 )
6012 : packFloat128( 0, 0x3FFF, 0, 0 );
6013 }
6014 return packFloat128( aSign, 0, 0, 0 );
6015 }
6016 lastBitMask = 1;
6017 lastBitMask <<= 0x402F - aExp;
6018 roundBitsMask = lastBitMask - 1;
6019 z.low = 0;
6020 z.high = a.high;
Peter Maydelldc355b72014-01-07 17:19:12 +00006021 switch (STATUS(float_rounding_mode)) {
6022 case float_round_nearest_even:
bellard158142c2005-03-13 16:54:06 +00006023 z.high += lastBitMask>>1;
6024 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
6025 z.high &= ~ lastBitMask;
6026 }
Peter Maydelldc355b72014-01-07 17:19:12 +00006027 break;
Peter Maydellf9288a72014-01-07 17:19:12 +00006028 case float_round_ties_away:
6029 z.high += lastBitMask>>1;
6030 break;
Peter Maydelldc355b72014-01-07 17:19:12 +00006031 case float_round_to_zero:
6032 break;
6033 case float_round_up:
6034 if (!extractFloat128Sign(z)) {
bellard158142c2005-03-13 16:54:06 +00006035 z.high |= ( a.low != 0 );
6036 z.high += roundBitsMask;
6037 }
Peter Maydelldc355b72014-01-07 17:19:12 +00006038 break;
6039 case float_round_down:
6040 if (extractFloat128Sign(z)) {
6041 z.high |= (a.low != 0);
6042 z.high += roundBitsMask;
6043 }
6044 break;
6045 default:
6046 abort();
bellard158142c2005-03-13 16:54:06 +00006047 }
6048 z.high &= ~ roundBitsMask;
6049 }
6050 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
6051 STATUS(float_exception_flags) |= float_flag_inexact;
6052 }
6053 return z;
6054
6055}
6056
6057/*----------------------------------------------------------------------------
6058| Returns the result of adding the absolute values of the quadruple-precision
6059| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
6060| before being returned. `zSign' is ignored if the result is a NaN.
6061| The addition is performed according to the IEC/IEEE Standard for Binary
6062| Floating-Point Arithmetic.
6063*----------------------------------------------------------------------------*/
6064
6065static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
6066{
6067 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006068 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
bellard158142c2005-03-13 16:54:06 +00006069 int32 expDiff;
6070
6071 aSig1 = extractFloat128Frac1( a );
6072 aSig0 = extractFloat128Frac0( a );
6073 aExp = extractFloat128Exp( a );
6074 bSig1 = extractFloat128Frac1( b );
6075 bSig0 = extractFloat128Frac0( b );
6076 bExp = extractFloat128Exp( b );
6077 expDiff = aExp - bExp;
6078 if ( 0 < expDiff ) {
6079 if ( aExp == 0x7FFF ) {
6080 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6081 return a;
6082 }
6083 if ( bExp == 0 ) {
6084 --expDiff;
6085 }
6086 else {
6087 bSig0 |= LIT64( 0x0001000000000000 );
6088 }
6089 shift128ExtraRightJamming(
6090 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
6091 zExp = aExp;
6092 }
6093 else if ( expDiff < 0 ) {
6094 if ( bExp == 0x7FFF ) {
6095 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6096 return packFloat128( zSign, 0x7FFF, 0, 0 );
6097 }
6098 if ( aExp == 0 ) {
6099 ++expDiff;
6100 }
6101 else {
6102 aSig0 |= LIT64( 0x0001000000000000 );
6103 }
6104 shift128ExtraRightJamming(
6105 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
6106 zExp = bExp;
6107 }
6108 else {
6109 if ( aExp == 0x7FFF ) {
6110 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6111 return propagateFloat128NaN( a, b STATUS_VAR );
6112 }
6113 return a;
6114 }
6115 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
pbrookfe76d972008-12-19 14:33:59 +00006116 if ( aExp == 0 ) {
Peter Maydelle6afc872011-05-19 14:46:17 +01006117 if (STATUS(flush_to_zero)) {
6118 if (zSig0 | zSig1) {
6119 float_raise(float_flag_output_denormal STATUS_VAR);
6120 }
6121 return packFloat128(zSign, 0, 0, 0);
6122 }
pbrookfe76d972008-12-19 14:33:59 +00006123 return packFloat128( zSign, 0, zSig0, zSig1 );
6124 }
bellard158142c2005-03-13 16:54:06 +00006125 zSig2 = 0;
6126 zSig0 |= LIT64( 0x0002000000000000 );
6127 zExp = aExp;
6128 goto shiftRight1;
6129 }
6130 aSig0 |= LIT64( 0x0001000000000000 );
6131 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6132 --zExp;
6133 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6134 ++zExp;
6135 shiftRight1:
6136 shift128ExtraRightJamming(
6137 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6138 roundAndPack:
6139 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6140
6141}
6142
6143/*----------------------------------------------------------------------------
6144| Returns the result of subtracting the absolute values of the quadruple-
6145| precision floating-point values `a' and `b'. If `zSign' is 1, the
6146| difference is negated before being returned. `zSign' is ignored if the
6147| result is a NaN. The subtraction is performed according to the IEC/IEEE
6148| Standard for Binary Floating-Point Arithmetic.
6149*----------------------------------------------------------------------------*/
6150
6151static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
6152{
6153 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006154 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
bellard158142c2005-03-13 16:54:06 +00006155 int32 expDiff;
6156 float128 z;
6157
6158 aSig1 = extractFloat128Frac1( a );
6159 aSig0 = extractFloat128Frac0( a );
6160 aExp = extractFloat128Exp( a );
6161 bSig1 = extractFloat128Frac1( b );
6162 bSig0 = extractFloat128Frac0( b );
6163 bExp = extractFloat128Exp( b );
6164 expDiff = aExp - bExp;
6165 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6166 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6167 if ( 0 < expDiff ) goto aExpBigger;
6168 if ( expDiff < 0 ) goto bExpBigger;
6169 if ( aExp == 0x7FFF ) {
6170 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
6171 return propagateFloat128NaN( a, b STATUS_VAR );
6172 }
6173 float_raise( float_flag_invalid STATUS_VAR);
6174 z.low = float128_default_nan_low;
6175 z.high = float128_default_nan_high;
6176 return z;
6177 }
6178 if ( aExp == 0 ) {
6179 aExp = 1;
6180 bExp = 1;
6181 }
6182 if ( bSig0 < aSig0 ) goto aBigger;
6183 if ( aSig0 < bSig0 ) goto bBigger;
6184 if ( bSig1 < aSig1 ) goto aBigger;
6185 if ( aSig1 < bSig1 ) goto bBigger;
6186 return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
6187 bExpBigger:
6188 if ( bExp == 0x7FFF ) {
6189 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6190 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6191 }
6192 if ( aExp == 0 ) {
6193 ++expDiff;
6194 }
6195 else {
6196 aSig0 |= LIT64( 0x4000000000000000 );
6197 }
6198 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6199 bSig0 |= LIT64( 0x4000000000000000 );
6200 bBigger:
6201 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6202 zExp = bExp;
6203 zSign ^= 1;
6204 goto normalizeRoundAndPack;
6205 aExpBigger:
6206 if ( aExp == 0x7FFF ) {
6207 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6208 return a;
6209 }
6210 if ( bExp == 0 ) {
6211 --expDiff;
6212 }
6213 else {
6214 bSig0 |= LIT64( 0x4000000000000000 );
6215 }
6216 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6217 aSig0 |= LIT64( 0x4000000000000000 );
6218 aBigger:
6219 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6220 zExp = aExp;
6221 normalizeRoundAndPack:
6222 --zExp;
6223 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
6224
6225}
6226
6227/*----------------------------------------------------------------------------
6228| Returns the result of adding the quadruple-precision floating-point values
6229| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6230| for Binary Floating-Point Arithmetic.
6231*----------------------------------------------------------------------------*/
6232
6233float128 float128_add( float128 a, float128 b STATUS_PARAM )
6234{
6235 flag aSign, bSign;
6236
6237 aSign = extractFloat128Sign( a );
6238 bSign = extractFloat128Sign( b );
6239 if ( aSign == bSign ) {
6240 return addFloat128Sigs( a, b, aSign STATUS_VAR );
6241 }
6242 else {
6243 return subFloat128Sigs( a, b, aSign STATUS_VAR );
6244 }
6245
6246}
6247
6248/*----------------------------------------------------------------------------
6249| Returns the result of subtracting the quadruple-precision floating-point
6250| values `a' and `b'. The operation is performed according to the IEC/IEEE
6251| Standard for Binary Floating-Point Arithmetic.
6252*----------------------------------------------------------------------------*/
6253
6254float128 float128_sub( float128 a, float128 b STATUS_PARAM )
6255{
6256 flag aSign, bSign;
6257
6258 aSign = extractFloat128Sign( a );
6259 bSign = extractFloat128Sign( b );
6260 if ( aSign == bSign ) {
6261 return subFloat128Sigs( a, b, aSign STATUS_VAR );
6262 }
6263 else {
6264 return addFloat128Sigs( a, b, aSign STATUS_VAR );
6265 }
6266
6267}
6268
6269/*----------------------------------------------------------------------------
6270| Returns the result of multiplying the quadruple-precision floating-point
6271| values `a' and `b'. The operation is performed according to the IEC/IEEE
6272| Standard for Binary Floating-Point Arithmetic.
6273*----------------------------------------------------------------------------*/
6274
6275float128 float128_mul( float128 a, float128 b STATUS_PARAM )
6276{
6277 flag aSign, bSign, zSign;
6278 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006279 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
bellard158142c2005-03-13 16:54:06 +00006280 float128 z;
6281
6282 aSig1 = extractFloat128Frac1( a );
6283 aSig0 = extractFloat128Frac0( a );
6284 aExp = extractFloat128Exp( a );
6285 aSign = extractFloat128Sign( a );
6286 bSig1 = extractFloat128Frac1( b );
6287 bSig0 = extractFloat128Frac0( b );
6288 bExp = extractFloat128Exp( b );
6289 bSign = extractFloat128Sign( b );
6290 zSign = aSign ^ bSign;
6291 if ( aExp == 0x7FFF ) {
6292 if ( ( aSig0 | aSig1 )
6293 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6294 return propagateFloat128NaN( a, b STATUS_VAR );
6295 }
6296 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6297 return packFloat128( zSign, 0x7FFF, 0, 0 );
6298 }
6299 if ( bExp == 0x7FFF ) {
6300 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6301 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6302 invalid:
6303 float_raise( float_flag_invalid STATUS_VAR);
6304 z.low = float128_default_nan_low;
6305 z.high = float128_default_nan_high;
6306 return z;
6307 }
6308 return packFloat128( zSign, 0x7FFF, 0, 0 );
6309 }
6310 if ( aExp == 0 ) {
6311 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6312 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6313 }
6314 if ( bExp == 0 ) {
6315 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6316 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6317 }
6318 zExp = aExp + bExp - 0x4000;
6319 aSig0 |= LIT64( 0x0001000000000000 );
6320 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6321 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6322 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6323 zSig2 |= ( zSig3 != 0 );
6324 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6325 shift128ExtraRightJamming(
6326 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6327 ++zExp;
6328 }
6329 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6330
6331}
6332
6333/*----------------------------------------------------------------------------
6334| Returns the result of dividing the quadruple-precision floating-point value
6335| `a' by the corresponding value `b'. The operation is performed according to
6336| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6337*----------------------------------------------------------------------------*/
6338
6339float128 float128_div( float128 a, float128 b STATUS_PARAM )
6340{
6341 flag aSign, bSign, zSign;
6342 int32 aExp, bExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006343 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6344 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00006345 float128 z;
6346
6347 aSig1 = extractFloat128Frac1( a );
6348 aSig0 = extractFloat128Frac0( a );
6349 aExp = extractFloat128Exp( a );
6350 aSign = extractFloat128Sign( a );
6351 bSig1 = extractFloat128Frac1( b );
6352 bSig0 = extractFloat128Frac0( b );
6353 bExp = extractFloat128Exp( b );
6354 bSign = extractFloat128Sign( b );
6355 zSign = aSign ^ bSign;
6356 if ( aExp == 0x7FFF ) {
6357 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6358 if ( bExp == 0x7FFF ) {
6359 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6360 goto invalid;
6361 }
6362 return packFloat128( zSign, 0x7FFF, 0, 0 );
6363 }
6364 if ( bExp == 0x7FFF ) {
6365 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6366 return packFloat128( zSign, 0, 0, 0 );
6367 }
6368 if ( bExp == 0 ) {
6369 if ( ( bSig0 | bSig1 ) == 0 ) {
6370 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6371 invalid:
6372 float_raise( float_flag_invalid STATUS_VAR);
6373 z.low = float128_default_nan_low;
6374 z.high = float128_default_nan_high;
6375 return z;
6376 }
6377 float_raise( float_flag_divbyzero STATUS_VAR);
6378 return packFloat128( zSign, 0x7FFF, 0, 0 );
6379 }
6380 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6381 }
6382 if ( aExp == 0 ) {
6383 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6384 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6385 }
6386 zExp = aExp - bExp + 0x3FFD;
6387 shortShift128Left(
6388 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6389 shortShift128Left(
6390 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6391 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6392 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6393 ++zExp;
6394 }
6395 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6396 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6397 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006398 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006399 --zSig0;
6400 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6401 }
6402 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6403 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6404 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6405 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006406 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006407 --zSig1;
6408 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6409 }
6410 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6411 }
6412 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
6413 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6414
6415}
6416
6417/*----------------------------------------------------------------------------
6418| Returns the remainder of the quadruple-precision floating-point value `a'
6419| with respect to the corresponding value `b'. The operation is performed
6420| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6421*----------------------------------------------------------------------------*/
6422
6423float128 float128_rem( float128 a, float128 b STATUS_PARAM )
6424{
Blue Swirled086f32010-03-07 13:49:58 +00006425 flag aSign, zSign;
bellard158142c2005-03-13 16:54:06 +00006426 int32 aExp, bExp, expDiff;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006427 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6428 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6429 int64_t sigMean0;
bellard158142c2005-03-13 16:54:06 +00006430 float128 z;
6431
6432 aSig1 = extractFloat128Frac1( a );
6433 aSig0 = extractFloat128Frac0( a );
6434 aExp = extractFloat128Exp( a );
6435 aSign = extractFloat128Sign( a );
6436 bSig1 = extractFloat128Frac1( b );
6437 bSig0 = extractFloat128Frac0( b );
6438 bExp = extractFloat128Exp( b );
bellard158142c2005-03-13 16:54:06 +00006439 if ( aExp == 0x7FFF ) {
6440 if ( ( aSig0 | aSig1 )
6441 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
6442 return propagateFloat128NaN( a, b STATUS_VAR );
6443 }
6444 goto invalid;
6445 }
6446 if ( bExp == 0x7FFF ) {
6447 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
6448 return a;
6449 }
6450 if ( bExp == 0 ) {
6451 if ( ( bSig0 | bSig1 ) == 0 ) {
6452 invalid:
6453 float_raise( float_flag_invalid STATUS_VAR);
6454 z.low = float128_default_nan_low;
6455 z.high = float128_default_nan_high;
6456 return z;
6457 }
6458 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6459 }
6460 if ( aExp == 0 ) {
6461 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6462 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6463 }
6464 expDiff = aExp - bExp;
6465 if ( expDiff < -1 ) return a;
6466 shortShift128Left(
6467 aSig0 | LIT64( 0x0001000000000000 ),
6468 aSig1,
6469 15 - ( expDiff < 0 ),
6470 &aSig0,
6471 &aSig1
6472 );
6473 shortShift128Left(
6474 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6475 q = le128( bSig0, bSig1, aSig0, aSig1 );
6476 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6477 expDiff -= 64;
6478 while ( 0 < expDiff ) {
6479 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6480 q = ( 4 < q ) ? q - 4 : 0;
6481 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6482 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6483 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6484 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6485 expDiff -= 61;
6486 }
6487 if ( -64 < expDiff ) {
6488 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6489 q = ( 4 < q ) ? q - 4 : 0;
6490 q >>= - expDiff;
6491 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6492 expDiff += 52;
6493 if ( expDiff < 0 ) {
6494 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6495 }
6496 else {
6497 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6498 }
6499 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6500 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6501 }
6502 else {
6503 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6504 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6505 }
6506 do {
6507 alternateASig0 = aSig0;
6508 alternateASig1 = aSig1;
6509 ++q;
6510 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006511 } while ( 0 <= (int64_t) aSig0 );
bellard158142c2005-03-13 16:54:06 +00006512 add128(
Andreas Färberbb98fe42011-03-07 01:34:06 +01006513 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
bellard158142c2005-03-13 16:54:06 +00006514 if ( ( sigMean0 < 0 )
6515 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6516 aSig0 = alternateASig0;
6517 aSig1 = alternateASig1;
6518 }
Andreas Färberbb98fe42011-03-07 01:34:06 +01006519 zSign = ( (int64_t) aSig0 < 0 );
bellard158142c2005-03-13 16:54:06 +00006520 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
6521 return
6522 normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
6523
6524}
6525
6526/*----------------------------------------------------------------------------
6527| Returns the square root of the quadruple-precision floating-point value `a'.
6528| The operation is performed according to the IEC/IEEE Standard for Binary
6529| Floating-Point Arithmetic.
6530*----------------------------------------------------------------------------*/
6531
6532float128 float128_sqrt( float128 a STATUS_PARAM )
6533{
6534 flag aSign;
6535 int32 aExp, zExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01006536 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6537 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
bellard158142c2005-03-13 16:54:06 +00006538 float128 z;
6539
6540 aSig1 = extractFloat128Frac1( a );
6541 aSig0 = extractFloat128Frac0( a );
6542 aExp = extractFloat128Exp( a );
6543 aSign = extractFloat128Sign( a );
6544 if ( aExp == 0x7FFF ) {
6545 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
6546 if ( ! aSign ) return a;
6547 goto invalid;
6548 }
6549 if ( aSign ) {
6550 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6551 invalid:
6552 float_raise( float_flag_invalid STATUS_VAR);
6553 z.low = float128_default_nan_low;
6554 z.high = float128_default_nan_high;
6555 return z;
6556 }
6557 if ( aExp == 0 ) {
6558 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6559 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6560 }
6561 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6562 aSig0 |= LIT64( 0x0001000000000000 );
6563 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6564 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6565 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6566 doubleZSig0 = zSig0<<1;
6567 mul64To128( zSig0, zSig0, &term0, &term1 );
6568 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006569 while ( (int64_t) rem0 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006570 --zSig0;
6571 doubleZSig0 -= 2;
6572 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6573 }
6574 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6575 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6576 if ( zSig1 == 0 ) zSig1 = 1;
6577 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6578 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6579 mul64To128( zSig1, zSig1, &term2, &term3 );
6580 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
Andreas Färberbb98fe42011-03-07 01:34:06 +01006581 while ( (int64_t) rem1 < 0 ) {
bellard158142c2005-03-13 16:54:06 +00006582 --zSig1;
6583 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6584 term3 |= 1;
6585 term2 |= doubleZSig0;
6586 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6587 }
6588 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6589 }
6590 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
6591 return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
6592
6593}
6594
6595/*----------------------------------------------------------------------------
6596| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarnob6893622011-04-14 00:49:29 +02006597| the corresponding value `b', and 0 otherwise. The invalid exception is
6598| raised if either operand is a NaN. Otherwise, the comparison is performed
bellard158142c2005-03-13 16:54:06 +00006599| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6600*----------------------------------------------------------------------------*/
6601
Aurelien Jarnob6893622011-04-14 00:49:29 +02006602int float128_eq( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006603{
6604
6605 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6606 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6607 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6608 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6609 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02006610 float_raise( float_flag_invalid STATUS_VAR);
bellard158142c2005-03-13 16:54:06 +00006611 return 0;
6612 }
6613 return
6614 ( a.low == b.low )
6615 && ( ( a.high == b.high )
6616 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01006617 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00006618 );
6619
6620}
6621
6622/*----------------------------------------------------------------------------
6623| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006624| or equal to the corresponding value `b', and 0 otherwise. The invalid
6625| exception is raised if either operand is a NaN. The comparison is performed
6626| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006627*----------------------------------------------------------------------------*/
6628
bellard750afe92006-10-28 19:27:11 +00006629int float128_le( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006630{
6631 flag aSign, bSign;
6632
6633 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6634 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6635 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6636 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6637 ) {
6638 float_raise( float_flag_invalid STATUS_VAR);
6639 return 0;
6640 }
6641 aSign = extractFloat128Sign( a );
6642 bSign = extractFloat128Sign( b );
6643 if ( aSign != bSign ) {
6644 return
6645 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006646 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006647 == 0 );
6648 }
6649 return
6650 aSign ? le128( b.high, b.low, a.high, a.low )
6651 : le128( a.high, a.low, b.high, b.low );
6652
6653}
6654
6655/*----------------------------------------------------------------------------
6656| Returns 1 if the quadruple-precision floating-point value `a' is less than
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006657| the corresponding value `b', and 0 otherwise. The invalid exception is
6658| raised if either operand is a NaN. The comparison is performed according
6659| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006660*----------------------------------------------------------------------------*/
6661
bellard750afe92006-10-28 19:27:11 +00006662int float128_lt( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006663{
6664 flag aSign, bSign;
6665
6666 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6667 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6668 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6669 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6670 ) {
6671 float_raise( float_flag_invalid STATUS_VAR);
6672 return 0;
6673 }
6674 aSign = extractFloat128Sign( a );
6675 bSign = extractFloat128Sign( b );
6676 if ( aSign != bSign ) {
6677 return
6678 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006679 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006680 != 0 );
6681 }
6682 return
6683 aSign ? lt128( b.high, b.low, a.high, a.low )
6684 : lt128( a.high, a.low, b.high, b.low );
6685
6686}
6687
6688/*----------------------------------------------------------------------------
Aurelien Jarno67b78612011-04-14 00:49:29 +02006689| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006690| be compared, and 0 otherwise. The invalid exception is raised if either
6691| operand is a NaN. The comparison is performed according to the IEC/IEEE
6692| Standard for Binary Floating-Point Arithmetic.
Aurelien Jarno67b78612011-04-14 00:49:29 +02006693*----------------------------------------------------------------------------*/
6694
6695int float128_unordered( float128 a, float128 b STATUS_PARAM )
6696{
6697 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6698 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6699 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6700 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6701 ) {
6702 float_raise( float_flag_invalid STATUS_VAR);
6703 return 1;
6704 }
6705 return 0;
6706}
6707
6708/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +00006709| Returns 1 if the quadruple-precision floating-point value `a' is equal to
Aurelien Jarnof5a64252011-04-14 00:49:30 +02006710| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6711| exception. The comparison is performed according to the IEC/IEEE Standard
6712| for Binary Floating-Point Arithmetic.
bellard158142c2005-03-13 16:54:06 +00006713*----------------------------------------------------------------------------*/
6714
Aurelien Jarnob6893622011-04-14 00:49:29 +02006715int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006716{
6717
6718 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6719 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6720 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6721 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6722 ) {
Aurelien Jarnob6893622011-04-14 00:49:29 +02006723 if ( float128_is_signaling_nan( a )
6724 || float128_is_signaling_nan( b ) ) {
6725 float_raise( float_flag_invalid STATUS_VAR);
6726 }
bellard158142c2005-03-13 16:54:06 +00006727 return 0;
6728 }
6729 return
6730 ( a.low == b.low )
6731 && ( ( a.high == b.high )
6732 || ( ( a.low == 0 )
Andreas Färberbb98fe42011-03-07 01:34:06 +01006733 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
bellard158142c2005-03-13 16:54:06 +00006734 );
6735
6736}
6737
6738/*----------------------------------------------------------------------------
6739| Returns 1 if the quadruple-precision floating-point value `a' is less than
6740| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6741| cause an exception. Otherwise, the comparison is performed according to the
6742| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6743*----------------------------------------------------------------------------*/
6744
bellard750afe92006-10-28 19:27:11 +00006745int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006746{
6747 flag aSign, bSign;
6748
6749 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6750 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6751 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6752 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6753 ) {
6754 if ( float128_is_signaling_nan( a )
6755 || float128_is_signaling_nan( b ) ) {
6756 float_raise( float_flag_invalid STATUS_VAR);
6757 }
6758 return 0;
6759 }
6760 aSign = extractFloat128Sign( a );
6761 bSign = extractFloat128Sign( b );
6762 if ( aSign != bSign ) {
6763 return
6764 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006765 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006766 == 0 );
6767 }
6768 return
6769 aSign ? le128( b.high, b.low, a.high, a.low )
6770 : le128( a.high, a.low, b.high, b.low );
6771
6772}
6773
6774/*----------------------------------------------------------------------------
6775| Returns 1 if the quadruple-precision floating-point value `a' is less than
6776| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6777| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6778| Standard for Binary Floating-Point Arithmetic.
6779*----------------------------------------------------------------------------*/
6780
bellard750afe92006-10-28 19:27:11 +00006781int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
bellard158142c2005-03-13 16:54:06 +00006782{
6783 flag aSign, bSign;
6784
6785 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6786 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6787 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6788 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6789 ) {
6790 if ( float128_is_signaling_nan( a )
6791 || float128_is_signaling_nan( b ) ) {
6792 float_raise( float_flag_invalid STATUS_VAR);
6793 }
6794 return 0;
6795 }
6796 aSign = extractFloat128Sign( a );
6797 bSign = extractFloat128Sign( b );
6798 if ( aSign != bSign ) {
6799 return
6800 aSign
Andreas Färberbb98fe42011-03-07 01:34:06 +01006801 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
bellard158142c2005-03-13 16:54:06 +00006802 != 0 );
6803 }
6804 return
6805 aSign ? lt128( b.high, b.low, a.high, a.low )
6806 : lt128( a.high, a.low, b.high, b.low );
6807
6808}
6809
Aurelien Jarno67b78612011-04-14 00:49:29 +02006810/*----------------------------------------------------------------------------
6811| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6812| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6813| comparison is performed according to the IEC/IEEE Standard for Binary
6814| Floating-Point Arithmetic.
6815*----------------------------------------------------------------------------*/
6816
6817int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
6818{
6819 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6820 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6821 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6822 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6823 ) {
6824 if ( float128_is_signaling_nan( a )
6825 || float128_is_signaling_nan( b ) ) {
6826 float_raise( float_flag_invalid STATUS_VAR);
6827 }
6828 return 1;
6829 }
6830 return 0;
6831}
6832
bellard1d6bda32005-03-13 18:52:29 +00006833/* misc functions */
Peter Maydellc4850f92014-01-07 17:17:49 +00006834float32 uint32_to_float32(uint32_t a STATUS_PARAM)
bellard1d6bda32005-03-13 18:52:29 +00006835{
6836 return int64_to_float32(a STATUS_VAR);
6837}
6838
Peter Maydellc4850f92014-01-07 17:17:49 +00006839float64 uint32_to_float64(uint32_t a STATUS_PARAM)
bellard1d6bda32005-03-13 18:52:29 +00006840{
6841 return int64_to_float64(a STATUS_VAR);
6842}
6843
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006844uint32 float32_to_uint32( float32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006845{
6846 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006847 uint32 res;
Peter Maydell34e1c272014-01-07 17:17:49 +00006848 int old_exc_flags = get_float_exception_flags(status);
bellard1d6bda32005-03-13 18:52:29 +00006849
6850 v = float32_to_int64(a STATUS_VAR);
6851 if (v < 0) {
6852 res = 0;
bellard1d6bda32005-03-13 18:52:29 +00006853 } else if (v > 0xffffffff) {
6854 res = 0xffffffff;
bellard1d6bda32005-03-13 18:52:29 +00006855 } else {
Peter Maydell34e1c272014-01-07 17:17:49 +00006856 return v;
bellard1d6bda32005-03-13 18:52:29 +00006857 }
Peter Maydell34e1c272014-01-07 17:17:49 +00006858 set_float_exception_flags(old_exc_flags, status);
6859 float_raise(float_flag_invalid STATUS_VAR);
bellard1d6bda32005-03-13 18:52:29 +00006860 return res;
6861}
6862
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006863uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006864{
6865 int64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006866 uint32 res;
Peter Maydell34e1c272014-01-07 17:17:49 +00006867 int old_exc_flags = get_float_exception_flags(status);
bellard1d6bda32005-03-13 18:52:29 +00006868
6869 v = float32_to_int64_round_to_zero(a STATUS_VAR);
6870 if (v < 0) {
6871 res = 0;
bellard1d6bda32005-03-13 18:52:29 +00006872 } else if (v > 0xffffffff) {
6873 res = 0xffffffff;
bellard1d6bda32005-03-13 18:52:29 +00006874 } else {
Peter Maydell34e1c272014-01-07 17:17:49 +00006875 return v;
bellard1d6bda32005-03-13 18:52:29 +00006876 }
Peter Maydell34e1c272014-01-07 17:17:49 +00006877 set_float_exception_flags(old_exc_flags, status);
6878 float_raise(float_flag_invalid STATUS_VAR);
bellard1d6bda32005-03-13 18:52:29 +00006879 return res;
6880}
6881
Will Newtonf581bf52014-01-07 17:17:48 +00006882int_fast16_t float32_to_int16(float32 a STATUS_PARAM)
6883{
6884 int32_t v;
6885 int_fast16_t res;
6886 int old_exc_flags = get_float_exception_flags(status);
6887
6888 v = float32_to_int32(a STATUS_VAR);
6889 if (v < -0x8000) {
6890 res = -0x8000;
6891 } else if (v > 0x7fff) {
6892 res = 0x7fff;
6893 } else {
6894 return v;
6895 }
6896
6897 set_float_exception_flags(old_exc_flags, status);
6898 float_raise(float_flag_invalid STATUS_VAR);
6899 return res;
6900}
6901
6902uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM)
6903{
6904 int32_t v;
6905 uint_fast16_t res;
6906 int old_exc_flags = get_float_exception_flags(status);
6907
6908 v = float32_to_int32(a STATUS_VAR);
6909 if (v < 0) {
6910 res = 0;
6911 } else if (v > 0xffff) {
6912 res = 0xffff;
6913 } else {
6914 return v;
6915 }
6916
6917 set_float_exception_flags(old_exc_flags, status);
6918 float_raise(float_flag_invalid STATUS_VAR);
6919 return res;
6920}
6921
Andreas Färber5aea4c52012-04-26 00:15:55 +02006922uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00006923{
6924 int64_t v;
Andreas Färber5aea4c52012-04-26 00:15:55 +02006925 uint_fast16_t res;
Peter Maydell34e1c272014-01-07 17:17:49 +00006926 int old_exc_flags = get_float_exception_flags(status);
Peter Maydellcbcef452010-12-07 15:37:34 +00006927
6928 v = float32_to_int64_round_to_zero(a STATUS_VAR);
6929 if (v < 0) {
6930 res = 0;
Peter Maydellcbcef452010-12-07 15:37:34 +00006931 } else if (v > 0xffff) {
6932 res = 0xffff;
Peter Maydellcbcef452010-12-07 15:37:34 +00006933 } else {
Peter Maydell34e1c272014-01-07 17:17:49 +00006934 return v;
Peter Maydellcbcef452010-12-07 15:37:34 +00006935 }
Peter Maydell34e1c272014-01-07 17:17:49 +00006936 set_float_exception_flags(old_exc_flags, status);
6937 float_raise(float_flag_invalid STATUS_VAR);
Peter Maydellcbcef452010-12-07 15:37:34 +00006938 return res;
6939}
6940
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006941uint32 float64_to_uint32( float64 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006942{
Tom Musta5e7f6542014-01-07 17:17:51 +00006943 uint64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006944 uint32 res;
Tom Musta5e7f6542014-01-07 17:17:51 +00006945 int old_exc_flags = get_float_exception_flags(status);
bellard1d6bda32005-03-13 18:52:29 +00006946
Tom Musta5e7f6542014-01-07 17:17:51 +00006947 v = float64_to_uint64(a STATUS_VAR);
6948 if (v > 0xffffffff) {
bellard1d6bda32005-03-13 18:52:29 +00006949 res = 0xffffffff;
bellard1d6bda32005-03-13 18:52:29 +00006950 } else {
Tom Musta5e7f6542014-01-07 17:17:51 +00006951 return v;
bellard1d6bda32005-03-13 18:52:29 +00006952 }
Tom Musta5e7f6542014-01-07 17:17:51 +00006953 set_float_exception_flags(old_exc_flags, status);
6954 float_raise(float_flag_invalid STATUS_VAR);
bellard1d6bda32005-03-13 18:52:29 +00006955 return res;
6956}
6957
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006958uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
bellard1d6bda32005-03-13 18:52:29 +00006959{
Tom Mustafd728f22014-01-07 17:17:51 +00006960 uint64_t v;
Andreas Färber9f8d2a02011-08-28 20:24:34 +02006961 uint32 res;
Tom Mustafd728f22014-01-07 17:17:51 +00006962 int old_exc_flags = get_float_exception_flags(status);
bellard1d6bda32005-03-13 18:52:29 +00006963
Tom Mustafd728f22014-01-07 17:17:51 +00006964 v = float64_to_uint64_round_to_zero(a STATUS_VAR);
6965 if (v > 0xffffffff) {
bellard1d6bda32005-03-13 18:52:29 +00006966 res = 0xffffffff;
bellard1d6bda32005-03-13 18:52:29 +00006967 } else {
Tom Mustafd728f22014-01-07 17:17:51 +00006968 return v;
bellard1d6bda32005-03-13 18:52:29 +00006969 }
Tom Mustafd728f22014-01-07 17:17:51 +00006970 set_float_exception_flags(old_exc_flags, status);
6971 float_raise(float_flag_invalid STATUS_VAR);
bellard1d6bda32005-03-13 18:52:29 +00006972 return res;
6973}
6974
Will Newtonf581bf52014-01-07 17:17:48 +00006975int_fast16_t float64_to_int16(float64 a STATUS_PARAM)
6976{
6977 int64_t v;
6978 int_fast16_t res;
6979 int old_exc_flags = get_float_exception_flags(status);
6980
6981 v = float64_to_int32(a STATUS_VAR);
6982 if (v < -0x8000) {
6983 res = -0x8000;
6984 } else if (v > 0x7fff) {
6985 res = 0x7fff;
6986 } else {
6987 return v;
6988 }
6989
6990 set_float_exception_flags(old_exc_flags, status);
6991 float_raise(float_flag_invalid STATUS_VAR);
6992 return res;
6993}
6994
6995uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM)
6996{
6997 int64_t v;
6998 uint_fast16_t res;
6999 int old_exc_flags = get_float_exception_flags(status);
7000
7001 v = float64_to_int32(a STATUS_VAR);
7002 if (v < 0) {
7003 res = 0;
7004 } else if (v > 0xffff) {
7005 res = 0xffff;
7006 } else {
7007 return v;
7008 }
7009
7010 set_float_exception_flags(old_exc_flags, status);
7011 float_raise(float_flag_invalid STATUS_VAR);
7012 return res;
7013}
7014
Andreas Färber5aea4c52012-04-26 00:15:55 +02007015uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
Peter Maydellcbcef452010-12-07 15:37:34 +00007016{
7017 int64_t v;
Andreas Färber5aea4c52012-04-26 00:15:55 +02007018 uint_fast16_t res;
Peter Maydell34e1c272014-01-07 17:17:49 +00007019 int old_exc_flags = get_float_exception_flags(status);
Peter Maydellcbcef452010-12-07 15:37:34 +00007020
7021 v = float64_to_int64_round_to_zero(a STATUS_VAR);
7022 if (v < 0) {
7023 res = 0;
Peter Maydellcbcef452010-12-07 15:37:34 +00007024 } else if (v > 0xffff) {
7025 res = 0xffff;
Peter Maydellcbcef452010-12-07 15:37:34 +00007026 } else {
Peter Maydell34e1c272014-01-07 17:17:49 +00007027 return v;
Peter Maydellcbcef452010-12-07 15:37:34 +00007028 }
Peter Maydell34e1c272014-01-07 17:17:49 +00007029 set_float_exception_flags(old_exc_flags, status);
7030 float_raise(float_flag_invalid STATUS_VAR);
Peter Maydellcbcef452010-12-07 15:37:34 +00007031 return res;
7032}
7033
Tom Mustafb3ea832014-01-07 17:17:49 +00007034/*----------------------------------------------------------------------------
7035| Returns the result of converting the double-precision floating-point value
7036| `a' to the 64-bit unsigned integer format. The conversion is
7037| performed according to the IEC/IEEE Standard for Binary Floating-Point
7038| Arithmetic---which means in particular that the conversion is rounded
7039| according to the current rounding mode. If `a' is a NaN, the largest
7040| positive integer is returned. If the conversion overflows, the
7041| largest unsigned integer is returned. If 'a' is negative, the value is
7042| rounded and zero is returned; negative values that do not round to zero
7043| will raise the inexact exception.
7044*----------------------------------------------------------------------------*/
7045
7046uint64_t float64_to_uint64(float64 a STATUS_PARAM)
j_mayer75d62a52007-03-20 22:10:42 +00007047{
Tom Mustafb3ea832014-01-07 17:17:49 +00007048 flag aSign;
7049 int_fast16_t aExp, shiftCount;
7050 uint64_t aSig, aSigExtra;
7051 a = float64_squash_input_denormal(a STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00007052
Tom Mustafb3ea832014-01-07 17:17:49 +00007053 aSig = extractFloat64Frac(a);
7054 aExp = extractFloat64Exp(a);
7055 aSign = extractFloat64Sign(a);
7056 if (aSign && (aExp > 1022)) {
7057 float_raise(float_flag_invalid STATUS_VAR);
7058 if (float64_is_any_nan(a)) {
7059 return LIT64(0xFFFFFFFFFFFFFFFF);
7060 } else {
7061 return 0;
7062 }
7063 }
7064 if (aExp) {
7065 aSig |= LIT64(0x0010000000000000);
7066 }
7067 shiftCount = 0x433 - aExp;
7068 if (shiftCount <= 0) {
7069 if (0x43E < aExp) {
7070 float_raise(float_flag_invalid STATUS_VAR);
7071 return LIT64(0xFFFFFFFFFFFFFFFF);
7072 }
7073 aSigExtra = 0;
7074 aSig <<= -shiftCount;
7075 } else {
7076 shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
7077 }
7078 return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
j_mayer75d62a52007-03-20 22:10:42 +00007079}
7080
7081uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
7082{
Tom Musta0a87a312014-01-07 17:17:50 +00007083 signed char current_rounding_mode = STATUS(float_rounding_mode);
7084 set_float_rounding_mode(float_round_to_zero STATUS_VAR);
7085 int64_t v = float64_to_uint64(a STATUS_VAR);
7086 set_float_rounding_mode(current_rounding_mode STATUS_VAR);
7087 return v;
j_mayer75d62a52007-03-20 22:10:42 +00007088}
7089
bellard1d6bda32005-03-13 18:52:29 +00007090#define COMPARE(s, nan_exp) \
Luiz Capitulinoa49db982014-06-19 10:13:43 -04007091static inline int float ## s ## _compare_internal( float ## s a, float ## s b, \
bellard1d6bda32005-03-13 18:52:29 +00007092 int is_quiet STATUS_PARAM ) \
7093{ \
7094 flag aSign, bSign; \
Andreas Färberbb98fe42011-03-07 01:34:06 +01007095 uint ## s ## _t av, bv; \
Peter Maydell37d18662011-01-06 19:37:53 +00007096 a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
7097 b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
bellard1d6bda32005-03-13 18:52:29 +00007098 \
7099 if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
7100 extractFloat ## s ## Frac( a ) ) || \
7101 ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
7102 extractFloat ## s ## Frac( b ) )) { \
7103 if (!is_quiet || \
7104 float ## s ## _is_signaling_nan( a ) || \
7105 float ## s ## _is_signaling_nan( b ) ) { \
7106 float_raise( float_flag_invalid STATUS_VAR); \
7107 } \
7108 return float_relation_unordered; \
7109 } \
7110 aSign = extractFloat ## s ## Sign( a ); \
7111 bSign = extractFloat ## s ## Sign( b ); \
pbrookf090c9d2007-11-18 14:33:24 +00007112 av = float ## s ## _val(a); \
blueswir1cd8a2532007-11-21 18:57:44 +00007113 bv = float ## s ## _val(b); \
bellard1d6bda32005-03-13 18:52:29 +00007114 if ( aSign != bSign ) { \
Andreas Färberbb98fe42011-03-07 01:34:06 +01007115 if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
bellard1d6bda32005-03-13 18:52:29 +00007116 /* zero case */ \
7117 return float_relation_equal; \
7118 } else { \
7119 return 1 - (2 * aSign); \
7120 } \
7121 } else { \
pbrookf090c9d2007-11-18 14:33:24 +00007122 if (av == bv) { \
bellard1d6bda32005-03-13 18:52:29 +00007123 return float_relation_equal; \
7124 } else { \
pbrookf090c9d2007-11-18 14:33:24 +00007125 return 1 - 2 * (aSign ^ ( av < bv )); \
bellard1d6bda32005-03-13 18:52:29 +00007126 } \
7127 } \
7128} \
7129 \
bellard750afe92006-10-28 19:27:11 +00007130int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
bellard1d6bda32005-03-13 18:52:29 +00007131{ \
7132 return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
7133} \
7134 \
bellard750afe92006-10-28 19:27:11 +00007135int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
bellard1d6bda32005-03-13 18:52:29 +00007136{ \
7137 return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
7138}
7139
7140COMPARE(32, 0xff)
7141COMPARE(64, 0x7ff)
pbrook9ee6e8b2007-11-11 00:04:49 +00007142
Luiz Capitulinoa49db982014-06-19 10:13:43 -04007143static inline int floatx80_compare_internal( floatx80 a, floatx80 b,
Aurelien Jarnof6714d32011-04-20 13:04:22 +02007144 int is_quiet STATUS_PARAM )
7145{
7146 flag aSign, bSign;
7147
7148 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
7149 ( extractFloatx80Frac( a )<<1 ) ) ||
7150 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
7151 ( extractFloatx80Frac( b )<<1 ) )) {
7152 if (!is_quiet ||
7153 floatx80_is_signaling_nan( a ) ||
7154 floatx80_is_signaling_nan( b ) ) {
7155 float_raise( float_flag_invalid STATUS_VAR);
7156 }
7157 return float_relation_unordered;
7158 }
7159 aSign = extractFloatx80Sign( a );
7160 bSign = extractFloatx80Sign( b );
7161 if ( aSign != bSign ) {
7162
7163 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
7164 ( ( a.low | b.low ) == 0 ) ) {
7165 /* zero case */
7166 return float_relation_equal;
7167 } else {
7168 return 1 - (2 * aSign);
7169 }
7170 } else {
7171 if (a.low == b.low && a.high == b.high) {
7172 return float_relation_equal;
7173 } else {
7174 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7175 }
7176 }
7177}
7178
7179int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
7180{
7181 return floatx80_compare_internal(a, b, 0 STATUS_VAR);
7182}
7183
7184int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
7185{
7186 return floatx80_compare_internal(a, b, 1 STATUS_VAR);
7187}
7188
Luiz Capitulinoa49db982014-06-19 10:13:43 -04007189static inline int float128_compare_internal( float128 a, float128 b,
blueswir11f587322007-11-25 18:40:20 +00007190 int is_quiet STATUS_PARAM )
7191{
7192 flag aSign, bSign;
7193
7194 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
7195 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
7196 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
7197 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
7198 if (!is_quiet ||
7199 float128_is_signaling_nan( a ) ||
7200 float128_is_signaling_nan( b ) ) {
7201 float_raise( float_flag_invalid STATUS_VAR);
7202 }
7203 return float_relation_unordered;
7204 }
7205 aSign = extractFloat128Sign( a );
7206 bSign = extractFloat128Sign( b );
7207 if ( aSign != bSign ) {
7208 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
7209 /* zero case */
7210 return float_relation_equal;
7211 } else {
7212 return 1 - (2 * aSign);
7213 }
7214 } else {
7215 if (a.low == b.low && a.high == b.high) {
7216 return float_relation_equal;
7217 } else {
7218 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7219 }
7220 }
7221}
7222
7223int float128_compare( float128 a, float128 b STATUS_PARAM )
7224{
7225 return float128_compare_internal(a, b, 0 STATUS_VAR);
7226}
7227
7228int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
7229{
7230 return float128_compare_internal(a, b, 1 STATUS_VAR);
7231}
7232
Peter Maydell274f1b02011-03-11 08:12:25 +00007233/* min() and max() functions. These can't be implemented as
7234 * 'compare and pick one input' because that would mishandle
7235 * NaNs and +0 vs -0.
Will Newtone17ab312013-12-06 17:01:41 +00007236 *
7237 * minnum() and maxnum() functions. These are similar to the min()
7238 * and max() functions but if one of the arguments is a QNaN and
7239 * the other is numerical then the numerical argument is returned.
7240 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
7241 * and maxNum() operations. min() and max() are the typical min/max
7242 * semantics provided by many CPUs which predate that specification.
Peter Maydell274f1b02011-03-11 08:12:25 +00007243 */
Will Newtone70614e2013-12-06 17:01:41 +00007244#define MINMAX(s) \
Luiz Capitulinoa49db982014-06-19 10:13:43 -04007245static inline float ## s float ## s ## _minmax(float ## s a, float ## s b, \
Will Newtone17ab312013-12-06 17:01:41 +00007246 int ismin, int isieee STATUS_PARAM) \
Peter Maydell274f1b02011-03-11 08:12:25 +00007247{ \
7248 flag aSign, bSign; \
7249 uint ## s ## _t av, bv; \
7250 a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
7251 b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
7252 if (float ## s ## _is_any_nan(a) || \
7253 float ## s ## _is_any_nan(b)) { \
Will Newtone17ab312013-12-06 17:01:41 +00007254 if (isieee) { \
7255 if (float ## s ## _is_quiet_nan(a) && \
7256 !float ## s ##_is_any_nan(b)) { \
7257 return b; \
7258 } else if (float ## s ## _is_quiet_nan(b) && \
7259 !float ## s ## _is_any_nan(a)) { \
7260 return a; \
7261 } \
7262 } \
Peter Maydell274f1b02011-03-11 08:12:25 +00007263 return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
7264 } \
7265 aSign = extractFloat ## s ## Sign(a); \
7266 bSign = extractFloat ## s ## Sign(b); \
7267 av = float ## s ## _val(a); \
7268 bv = float ## s ## _val(b); \
7269 if (aSign != bSign) { \
7270 if (ismin) { \
7271 return aSign ? a : b; \
7272 } else { \
7273 return aSign ? b : a; \
7274 } \
7275 } else { \
7276 if (ismin) { \
7277 return (aSign ^ (av < bv)) ? a : b; \
7278 } else { \
7279 return (aSign ^ (av < bv)) ? b : a; \
7280 } \
7281 } \
7282} \
7283 \
7284float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
7285{ \
Will Newtone17ab312013-12-06 17:01:41 +00007286 return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR); \
Peter Maydell274f1b02011-03-11 08:12:25 +00007287} \
7288 \
7289float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
7290{ \
Will Newtone17ab312013-12-06 17:01:41 +00007291 return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR); \
7292} \
7293 \
7294float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \
7295{ \
7296 return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR); \
7297} \
7298 \
7299float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \
7300{ \
7301 return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR); \
Peter Maydell274f1b02011-03-11 08:12:25 +00007302}
7303
Will Newtone70614e2013-12-06 17:01:41 +00007304MINMAX(32)
7305MINMAX(64)
Peter Maydell274f1b02011-03-11 08:12:25 +00007306
7307
pbrook9ee6e8b2007-11-11 00:04:49 +00007308/* Multiply A by 2 raised to the power N. */
7309float32 float32_scalbn( float32 a, int n STATUS_PARAM )
7310{
7311 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007312 int16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01007313 uint32_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00007314
Peter Maydell37d18662011-01-06 19:37:53 +00007315 a = float32_squash_input_denormal(a STATUS_VAR);
pbrook9ee6e8b2007-11-11 00:04:49 +00007316 aSig = extractFloat32Frac( a );
7317 aExp = extractFloat32Exp( a );
7318 aSign = extractFloat32Sign( a );
7319
7320 if ( aExp == 0xFF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007321 if ( aSig ) {
7322 return propagateFloat32NaN( a, a STATUS_VAR );
7323 }
pbrook9ee6e8b2007-11-11 00:04:49 +00007324 return a;
7325 }
Peter Maydell3c85c372014-01-07 17:17:50 +00007326 if (aExp != 0) {
pbrook69397542008-12-19 12:59:28 +00007327 aSig |= 0x00800000;
Peter Maydell3c85c372014-01-07 17:17:50 +00007328 } else if (aSig == 0) {
pbrook69397542008-12-19 12:59:28 +00007329 return a;
Peter Maydell3c85c372014-01-07 17:17:50 +00007330 } else {
7331 aExp++;
7332 }
pbrook69397542008-12-19 12:59:28 +00007333
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007334 if (n > 0x200) {
7335 n = 0x200;
7336 } else if (n < -0x200) {
7337 n = -0x200;
7338 }
7339
pbrook69397542008-12-19 12:59:28 +00007340 aExp += n - 1;
7341 aSig <<= 7;
7342 return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00007343}
7344
7345float64 float64_scalbn( float64 a, int n STATUS_PARAM )
7346{
7347 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007348 int16_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01007349 uint64_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00007350
Peter Maydell37d18662011-01-06 19:37:53 +00007351 a = float64_squash_input_denormal(a STATUS_VAR);
pbrook9ee6e8b2007-11-11 00:04:49 +00007352 aSig = extractFloat64Frac( a );
7353 aExp = extractFloat64Exp( a );
7354 aSign = extractFloat64Sign( a );
7355
7356 if ( aExp == 0x7FF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007357 if ( aSig ) {
7358 return propagateFloat64NaN( a, a STATUS_VAR );
7359 }
pbrook9ee6e8b2007-11-11 00:04:49 +00007360 return a;
7361 }
Peter Maydell3c85c372014-01-07 17:17:50 +00007362 if (aExp != 0) {
pbrook69397542008-12-19 12:59:28 +00007363 aSig |= LIT64( 0x0010000000000000 );
Peter Maydell3c85c372014-01-07 17:17:50 +00007364 } else if (aSig == 0) {
pbrook69397542008-12-19 12:59:28 +00007365 return a;
Peter Maydell3c85c372014-01-07 17:17:50 +00007366 } else {
7367 aExp++;
7368 }
pbrook69397542008-12-19 12:59:28 +00007369
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007370 if (n > 0x1000) {
7371 n = 0x1000;
7372 } else if (n < -0x1000) {
7373 n = -0x1000;
7374 }
7375
pbrook69397542008-12-19 12:59:28 +00007376 aExp += n - 1;
7377 aSig <<= 10;
7378 return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00007379}
7380
pbrook9ee6e8b2007-11-11 00:04:49 +00007381floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
7382{
7383 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007384 int32_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01007385 uint64_t aSig;
pbrook9ee6e8b2007-11-11 00:04:49 +00007386
7387 aSig = extractFloatx80Frac( a );
7388 aExp = extractFloatx80Exp( a );
7389 aSign = extractFloatx80Sign( a );
7390
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007391 if ( aExp == 0x7FFF ) {
7392 if ( aSig<<1 ) {
7393 return propagateFloatx80NaN( a, a STATUS_VAR );
7394 }
pbrook9ee6e8b2007-11-11 00:04:49 +00007395 return a;
7396 }
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007397
Peter Maydell3c85c372014-01-07 17:17:50 +00007398 if (aExp == 0) {
7399 if (aSig == 0) {
7400 return a;
7401 }
7402 aExp++;
7403 }
pbrook69397542008-12-19 12:59:28 +00007404
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007405 if (n > 0x10000) {
7406 n = 0x10000;
7407 } else if (n < -0x10000) {
7408 n = -0x10000;
7409 }
7410
pbrook9ee6e8b2007-11-11 00:04:49 +00007411 aExp += n;
pbrook69397542008-12-19 12:59:28 +00007412 return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
7413 aSign, aExp, aSig, 0 STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00007414}
pbrook9ee6e8b2007-11-11 00:04:49 +00007415
pbrook9ee6e8b2007-11-11 00:04:49 +00007416float128 float128_scalbn( float128 a, int n STATUS_PARAM )
7417{
7418 flag aSign;
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007419 int32_t aExp;
Andreas Färberbb98fe42011-03-07 01:34:06 +01007420 uint64_t aSig0, aSig1;
pbrook9ee6e8b2007-11-11 00:04:49 +00007421
7422 aSig1 = extractFloat128Frac1( a );
7423 aSig0 = extractFloat128Frac0( a );
7424 aExp = extractFloat128Exp( a );
7425 aSign = extractFloat128Sign( a );
7426 if ( aExp == 0x7FFF ) {
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007427 if ( aSig0 | aSig1 ) {
7428 return propagateFloat128NaN( a, a STATUS_VAR );
7429 }
pbrook9ee6e8b2007-11-11 00:04:49 +00007430 return a;
7431 }
Peter Maydell3c85c372014-01-07 17:17:50 +00007432 if (aExp != 0) {
pbrook69397542008-12-19 12:59:28 +00007433 aSig0 |= LIT64( 0x0001000000000000 );
Peter Maydell3c85c372014-01-07 17:17:50 +00007434 } else if (aSig0 == 0 && aSig1 == 0) {
pbrook69397542008-12-19 12:59:28 +00007435 return a;
Peter Maydell3c85c372014-01-07 17:17:50 +00007436 } else {
7437 aExp++;
7438 }
pbrook69397542008-12-19 12:59:28 +00007439
Aurelien Jarno326b9e92011-04-20 13:04:22 +02007440 if (n > 0x10000) {
7441 n = 0x10000;
7442 } else if (n < -0x10000) {
7443 n = -0x10000;
7444 }
7445
pbrook69397542008-12-19 12:59:28 +00007446 aExp += n - 1;
7447 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
7448 STATUS_VAR );
pbrook9ee6e8b2007-11-11 00:04:49 +00007449
7450}