blob: 5e030cd8e5c06cd699073327e6b0e0a746eb5c25 [file] [log] [blame]
Andreas Färber8d725fa2011-03-07 01:34:04 +01001/*
2 * QEMU float support macros
3 *
Peter Maydell16017c42015-01-12 14:38:28 +00004 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
Andreas Färber8d725fa2011-03-07 01:34:04 +010016 */
bellard158142c2005-03-13 16:54:06 +000017
Peter Maydella7d1ac72015-01-12 14:38:25 +000018/*
19===============================================================================
bellard158142c2005-03-13 16:54:06 +000020This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
Peter Maydella7d1ac72015-01-12 14:38:25 +000021Arithmetic Package, Release 2a.
bellard158142c2005-03-13 16:54:06 +000022
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
Peter Maydella7d1ac72015-01-12 14:38:25 +000030is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
bellard158142c2005-03-13 16:54:06 +000031arithmetic/SoftFloat.html'.
32
Peter Maydella7d1ac72015-01-12 14:38:25 +000033THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
bellard158142c2005-03-13 16:54:06 +000038
39Derivative works are acceptable, even for commercial purposes, so long as
Peter Maydella7d1ac72015-01-12 14:38:25 +000040(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
bellard158142c2005-03-13 16:54:06 +000043
Peter Maydella7d1ac72015-01-12 14:38:25 +000044===============================================================================
45*/
bellard158142c2005-03-13 16:54:06 +000046
Peter Maydell16017c42015-01-12 14:38:28 +000047/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
bellard158142c2005-03-13 16:54:06 +000082/*----------------------------------------------------------------------------
Aurelien Jarnob3b4c7f2011-04-14 00:49:29 +020083| This macro tests for minimum version of the GNU C compiler.
84*----------------------------------------------------------------------------*/
85#if defined(__GNUC__) && defined(__GNUC_MINOR__)
86# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
87 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
88#else
89# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
90#endif
91
92
93/*----------------------------------------------------------------------------
bellard158142c2005-03-13 16:54:06 +000094| Shifts `a' right by the number of bits given in `count'. If any nonzero
95| bits are shifted off, they are ``jammed'' into the least significant bit of
96| the result by setting the least significant bit to 1. The value of `count'
97| can be arbitrarily large; in particular, if `count' is greater than 32, the
98| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
99| The result is stored in the location pointed to by `zPtr'.
100*----------------------------------------------------------------------------*/
101
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400102static inline void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
bellard158142c2005-03-13 16:54:06 +0000103{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100104 uint32_t z;
bellard158142c2005-03-13 16:54:06 +0000105
106 if ( count == 0 ) {
107 z = a;
108 }
109 else if ( count < 32 ) {
110 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
111 }
112 else {
113 z = ( a != 0 );
114 }
115 *zPtr = z;
116
117}
118
119/*----------------------------------------------------------------------------
120| Shifts `a' right by the number of bits given in `count'. If any nonzero
121| bits are shifted off, they are ``jammed'' into the least significant bit of
122| the result by setting the least significant bit to 1. The value of `count'
123| can be arbitrarily large; in particular, if `count' is greater than 64, the
124| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
125| The result is stored in the location pointed to by `zPtr'.
126*----------------------------------------------------------------------------*/
127
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400128static inline void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
bellard158142c2005-03-13 16:54:06 +0000129{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100130 uint64_t z;
bellard158142c2005-03-13 16:54:06 +0000131
132 if ( count == 0 ) {
133 z = a;
134 }
135 else if ( count < 64 ) {
136 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
137 }
138 else {
139 z = ( a != 0 );
140 }
141 *zPtr = z;
142
143}
144
145/*----------------------------------------------------------------------------
146| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
147| _plus_ the number of bits given in `count'. The shifted result is at most
148| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
149| bits shifted off form a second 64-bit result as follows: The _last_ bit
150| shifted off is the most-significant bit of the extra result, and the other
151| 63 bits of the extra result are all zero if and only if _all_but_the_last_
152| bits shifted off were all zero. This extra result is stored in the location
153| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
Peter Maydella7d1ac72015-01-12 14:38:25 +0000154| (This routine makes more sense if `a0' and `a1' are considered to form a
155| fixed-point value with binary point between `a0' and `a1'. This fixed-point
156| value is shifted right by the number of bits given in `count', and the
157| integer part of the result is returned at the location pointed to by
bellard158142c2005-03-13 16:54:06 +0000158| `z0Ptr'. The fractional part of the result may be slightly corrupted as
159| described above, and is returned at the location pointed to by `z1Ptr'.)
160*----------------------------------------------------------------------------*/
161
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400162static inline void
bellard158142c2005-03-13 16:54:06 +0000163 shift64ExtraRightJamming(
Andreas Färber94a49d82012-04-26 00:15:56 +0200164 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
bellard158142c2005-03-13 16:54:06 +0000165{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100166 uint64_t z0, z1;
bellard158142c2005-03-13 16:54:06 +0000167 int8 negCount = ( - count ) & 63;
168
169 if ( count == 0 ) {
170 z1 = a1;
171 z0 = a0;
172 }
173 else if ( count < 64 ) {
174 z1 = ( a0<<negCount ) | ( a1 != 0 );
175 z0 = a0>>count;
176 }
177 else {
178 if ( count == 64 ) {
179 z1 = a0 | ( a1 != 0 );
180 }
181 else {
182 z1 = ( ( a0 | a1 ) != 0 );
183 }
184 z0 = 0;
185 }
186 *z1Ptr = z1;
187 *z0Ptr = z0;
188
189}
190
191/*----------------------------------------------------------------------------
192| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
193| number of bits given in `count'. Any bits shifted off are lost. The value
194| of `count' can be arbitrarily large; in particular, if `count' is greater
195| than 128, the result will be 0. The result is broken into two 64-bit pieces
196| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
197*----------------------------------------------------------------------------*/
198
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400199static inline void
bellard158142c2005-03-13 16:54:06 +0000200 shift128Right(
Andreas Färber94a49d82012-04-26 00:15:56 +0200201 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
bellard158142c2005-03-13 16:54:06 +0000202{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100203 uint64_t z0, z1;
bellard158142c2005-03-13 16:54:06 +0000204 int8 negCount = ( - count ) & 63;
205
206 if ( count == 0 ) {
207 z1 = a1;
208 z0 = a0;
209 }
210 else if ( count < 64 ) {
211 z1 = ( a0<<negCount ) | ( a1>>count );
212 z0 = a0>>count;
213 }
214 else {
Peter Maydell40397362013-06-02 16:17:49 +0100215 z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
bellard158142c2005-03-13 16:54:06 +0000216 z0 = 0;
217 }
218 *z1Ptr = z1;
219 *z0Ptr = z0;
220
221}
222
223/*----------------------------------------------------------------------------
224| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
225| number of bits given in `count'. If any nonzero bits are shifted off, they
226| are ``jammed'' into the least significant bit of the result by setting the
227| least significant bit to 1. The value of `count' can be arbitrarily large;
228| in particular, if `count' is greater than 128, the result will be either
229| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
230| nonzero. The result is broken into two 64-bit pieces which are stored at
231| the locations pointed to by `z0Ptr' and `z1Ptr'.
232*----------------------------------------------------------------------------*/
233
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400234static inline void
bellard158142c2005-03-13 16:54:06 +0000235 shift128RightJamming(
Andreas Färber94a49d82012-04-26 00:15:56 +0200236 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
bellard158142c2005-03-13 16:54:06 +0000237{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100238 uint64_t z0, z1;
bellard158142c2005-03-13 16:54:06 +0000239 int8 negCount = ( - count ) & 63;
240
241 if ( count == 0 ) {
242 z1 = a1;
243 z0 = a0;
244 }
245 else if ( count < 64 ) {
246 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
247 z0 = a0>>count;
248 }
249 else {
250 if ( count == 64 ) {
251 z1 = a0 | ( a1 != 0 );
252 }
253 else if ( count < 128 ) {
254 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
255 }
256 else {
257 z1 = ( ( a0 | a1 ) != 0 );
258 }
259 z0 = 0;
260 }
261 *z1Ptr = z1;
262 *z0Ptr = z0;
263
264}
265
266/*----------------------------------------------------------------------------
267| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
268| by 64 _plus_ the number of bits given in `count'. The shifted result is
269| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
270| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
271| off form a third 64-bit result as follows: The _last_ bit shifted off is
272| the most-significant bit of the extra result, and the other 63 bits of the
273| extra result are all zero if and only if _all_but_the_last_ bits shifted off
274| were all zero. This extra result is stored in the location pointed to by
275| `z2Ptr'. The value of `count' can be arbitrarily large.
276| (This routine makes more sense if `a0', `a1', and `a2' are considered
277| to form a fixed-point value with binary point between `a1' and `a2'. This
278| fixed-point value is shifted right by the number of bits given in `count',
279| and the integer part of the result is returned at the locations pointed to
280| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
281| corrupted as described above, and is returned at the location pointed to by
282| `z2Ptr'.)
283*----------------------------------------------------------------------------*/
284
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400285static inline void
bellard158142c2005-03-13 16:54:06 +0000286 shift128ExtraRightJamming(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100287 uint64_t a0,
288 uint64_t a1,
289 uint64_t a2,
Andreas Färber94a49d82012-04-26 00:15:56 +0200290 int_fast16_t count,
Andreas Färberbb98fe42011-03-07 01:34:06 +0100291 uint64_t *z0Ptr,
292 uint64_t *z1Ptr,
293 uint64_t *z2Ptr
bellard158142c2005-03-13 16:54:06 +0000294 )
295{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100296 uint64_t z0, z1, z2;
bellard158142c2005-03-13 16:54:06 +0000297 int8 negCount = ( - count ) & 63;
298
299 if ( count == 0 ) {
300 z2 = a2;
301 z1 = a1;
302 z0 = a0;
303 }
304 else {
305 if ( count < 64 ) {
306 z2 = a1<<negCount;
307 z1 = ( a0<<negCount ) | ( a1>>count );
308 z0 = a0>>count;
309 }
310 else {
311 if ( count == 64 ) {
312 z2 = a1;
313 z1 = a0;
314 }
315 else {
316 a2 |= a1;
317 if ( count < 128 ) {
318 z2 = a0<<negCount;
319 z1 = a0>>( count & 63 );
320 }
321 else {
322 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
323 z1 = 0;
324 }
325 }
326 z0 = 0;
327 }
328 z2 |= ( a2 != 0 );
329 }
330 *z2Ptr = z2;
331 *z1Ptr = z1;
332 *z0Ptr = z0;
333
334}
335
336/*----------------------------------------------------------------------------
337| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
338| number of bits given in `count'. Any bits shifted off are lost. The value
339| of `count' must be less than 64. The result is broken into two 64-bit
340| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
341*----------------------------------------------------------------------------*/
342
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400343static inline void
bellard158142c2005-03-13 16:54:06 +0000344 shortShift128Left(
Andreas Färber94a49d82012-04-26 00:15:56 +0200345 uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
bellard158142c2005-03-13 16:54:06 +0000346{
347
348 *z1Ptr = a1<<count;
349 *z0Ptr =
350 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
351
352}
353
354/*----------------------------------------------------------------------------
355| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
356| by the number of bits given in `count'. Any bits shifted off are lost.
357| The value of `count' must be less than 64. The result is broken into three
358| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
359| `z1Ptr', and `z2Ptr'.
360*----------------------------------------------------------------------------*/
361
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400362static inline void
bellard158142c2005-03-13 16:54:06 +0000363 shortShift192Left(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100364 uint64_t a0,
365 uint64_t a1,
366 uint64_t a2,
Andreas Färber94a49d82012-04-26 00:15:56 +0200367 int_fast16_t count,
Andreas Färberbb98fe42011-03-07 01:34:06 +0100368 uint64_t *z0Ptr,
369 uint64_t *z1Ptr,
370 uint64_t *z2Ptr
bellard158142c2005-03-13 16:54:06 +0000371 )
372{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100373 uint64_t z0, z1, z2;
bellard158142c2005-03-13 16:54:06 +0000374 int8 negCount;
375
376 z2 = a2<<count;
377 z1 = a1<<count;
378 z0 = a0<<count;
379 if ( 0 < count ) {
380 negCount = ( ( - count ) & 63 );
381 z1 |= a2>>negCount;
382 z0 |= a1>>negCount;
383 }
384 *z2Ptr = z2;
385 *z1Ptr = z1;
386 *z0Ptr = z0;
387
388}
389
390/*----------------------------------------------------------------------------
391| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
392| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
393| any carry out is lost. The result is broken into two 64-bit pieces which
394| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
395*----------------------------------------------------------------------------*/
396
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400397static inline void
bellard158142c2005-03-13 16:54:06 +0000398 add128(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100399 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
bellard158142c2005-03-13 16:54:06 +0000400{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100401 uint64_t z1;
bellard158142c2005-03-13 16:54:06 +0000402
403 z1 = a1 + b1;
404 *z1Ptr = z1;
405 *z0Ptr = a0 + b0 + ( z1 < a1 );
406
407}
408
409/*----------------------------------------------------------------------------
410| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
411| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
412| modulo 2^192, so any carry out is lost. The result is broken into three
413| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
414| `z1Ptr', and `z2Ptr'.
415*----------------------------------------------------------------------------*/
416
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400417static inline void
bellard158142c2005-03-13 16:54:06 +0000418 add192(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100419 uint64_t a0,
420 uint64_t a1,
421 uint64_t a2,
422 uint64_t b0,
423 uint64_t b1,
424 uint64_t b2,
425 uint64_t *z0Ptr,
426 uint64_t *z1Ptr,
427 uint64_t *z2Ptr
bellard158142c2005-03-13 16:54:06 +0000428 )
429{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100430 uint64_t z0, z1, z2;
bellard158142c2005-03-13 16:54:06 +0000431 int8 carry0, carry1;
432
433 z2 = a2 + b2;
434 carry1 = ( z2 < a2 );
435 z1 = a1 + b1;
436 carry0 = ( z1 < a1 );
437 z0 = a0 + b0;
438 z1 += carry1;
439 z0 += ( z1 < carry1 );
440 z0 += carry0;
441 *z2Ptr = z2;
442 *z1Ptr = z1;
443 *z0Ptr = z0;
444
445}
446
447/*----------------------------------------------------------------------------
448| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
449| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
450| 2^128, so any borrow out (carry out) is lost. The result is broken into two
451| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
452| `z1Ptr'.
453*----------------------------------------------------------------------------*/
454
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400455static inline void
bellard158142c2005-03-13 16:54:06 +0000456 sub128(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100457 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
bellard158142c2005-03-13 16:54:06 +0000458{
459
460 *z1Ptr = a1 - b1;
461 *z0Ptr = a0 - b0 - ( a1 < b1 );
462
463}
464
465/*----------------------------------------------------------------------------
466| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
467| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
468| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
469| result is broken into three 64-bit pieces which are stored at the locations
470| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
471*----------------------------------------------------------------------------*/
472
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400473static inline void
bellard158142c2005-03-13 16:54:06 +0000474 sub192(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100475 uint64_t a0,
476 uint64_t a1,
477 uint64_t a2,
478 uint64_t b0,
479 uint64_t b1,
480 uint64_t b2,
481 uint64_t *z0Ptr,
482 uint64_t *z1Ptr,
483 uint64_t *z2Ptr
bellard158142c2005-03-13 16:54:06 +0000484 )
485{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100486 uint64_t z0, z1, z2;
bellard158142c2005-03-13 16:54:06 +0000487 int8 borrow0, borrow1;
488
489 z2 = a2 - b2;
490 borrow1 = ( a2 < b2 );
491 z1 = a1 - b1;
492 borrow0 = ( a1 < b1 );
493 z0 = a0 - b0;
494 z0 -= ( z1 < borrow1 );
495 z1 -= borrow1;
496 z0 -= borrow0;
497 *z2Ptr = z2;
498 *z1Ptr = z1;
499 *z0Ptr = z0;
500
501}
502
503/*----------------------------------------------------------------------------
504| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
505| into two 64-bit pieces which are stored at the locations pointed to by
506| `z0Ptr' and `z1Ptr'.
507*----------------------------------------------------------------------------*/
508
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400509static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
bellard158142c2005-03-13 16:54:06 +0000510{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100511 uint32_t aHigh, aLow, bHigh, bLow;
512 uint64_t z0, zMiddleA, zMiddleB, z1;
bellard158142c2005-03-13 16:54:06 +0000513
514 aLow = a;
515 aHigh = a>>32;
516 bLow = b;
517 bHigh = b>>32;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100518 z1 = ( (uint64_t) aLow ) * bLow;
519 zMiddleA = ( (uint64_t) aLow ) * bHigh;
520 zMiddleB = ( (uint64_t) aHigh ) * bLow;
521 z0 = ( (uint64_t) aHigh ) * bHigh;
bellard158142c2005-03-13 16:54:06 +0000522 zMiddleA += zMiddleB;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100523 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
bellard158142c2005-03-13 16:54:06 +0000524 zMiddleA <<= 32;
525 z1 += zMiddleA;
526 z0 += ( z1 < zMiddleA );
527 *z1Ptr = z1;
528 *z0Ptr = z0;
529
530}
531
532/*----------------------------------------------------------------------------
533| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
534| `b' to obtain a 192-bit product. The product is broken into three 64-bit
535| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
536| `z2Ptr'.
537*----------------------------------------------------------------------------*/
538
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400539static inline void
bellard158142c2005-03-13 16:54:06 +0000540 mul128By64To192(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100541 uint64_t a0,
542 uint64_t a1,
543 uint64_t b,
544 uint64_t *z0Ptr,
545 uint64_t *z1Ptr,
546 uint64_t *z2Ptr
bellard158142c2005-03-13 16:54:06 +0000547 )
548{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100549 uint64_t z0, z1, z2, more1;
bellard158142c2005-03-13 16:54:06 +0000550
551 mul64To128( a1, b, &z1, &z2 );
552 mul64To128( a0, b, &z0, &more1 );
553 add128( z0, more1, 0, z1, &z0, &z1 );
554 *z2Ptr = z2;
555 *z1Ptr = z1;
556 *z0Ptr = z0;
557
558}
559
560/*----------------------------------------------------------------------------
561| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
562| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
563| product. The product is broken into four 64-bit pieces which are stored at
564| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
565*----------------------------------------------------------------------------*/
566
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400567static inline void
bellard158142c2005-03-13 16:54:06 +0000568 mul128To256(
Andreas Färberbb98fe42011-03-07 01:34:06 +0100569 uint64_t a0,
570 uint64_t a1,
571 uint64_t b0,
572 uint64_t b1,
573 uint64_t *z0Ptr,
574 uint64_t *z1Ptr,
575 uint64_t *z2Ptr,
576 uint64_t *z3Ptr
bellard158142c2005-03-13 16:54:06 +0000577 )
578{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100579 uint64_t z0, z1, z2, z3;
580 uint64_t more1, more2;
bellard158142c2005-03-13 16:54:06 +0000581
582 mul64To128( a1, b1, &z2, &z3 );
583 mul64To128( a1, b0, &z1, &more2 );
584 add128( z1, more2, 0, z2, &z1, &z2 );
585 mul64To128( a0, b0, &z0, &more1 );
586 add128( z0, more1, 0, z1, &z0, &z1 );
587 mul64To128( a0, b1, &more1, &more2 );
588 add128( more1, more2, 0, z2, &more1, &z2 );
589 add128( z0, z1, 0, more1, &z0, &z1 );
590 *z3Ptr = z3;
591 *z2Ptr = z2;
592 *z1Ptr = z1;
593 *z0Ptr = z0;
594
595}
596
597/*----------------------------------------------------------------------------
598| Returns an approximation to the 64-bit integer quotient obtained by dividing
599| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
600| divisor `b' must be at least 2^63. If q is the exact quotient truncated
601| toward zero, the approximation returned lies between q and q + 2 inclusive.
602| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
603| unsigned integer is returned.
604*----------------------------------------------------------------------------*/
605
Andreas Färberbb98fe42011-03-07 01:34:06 +0100606static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
bellard158142c2005-03-13 16:54:06 +0000607{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100608 uint64_t b0, b1;
609 uint64_t rem0, rem1, term0, term1;
610 uint64_t z;
bellard158142c2005-03-13 16:54:06 +0000611
612 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
613 b0 = b>>32;
614 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
615 mul64To128( b, z, &term0, &term1 );
616 sub128( a0, a1, term0, term1, &rem0, &rem1 );
Andreas Färberbb98fe42011-03-07 01:34:06 +0100617 while ( ( (int64_t) rem0 ) < 0 ) {
bellard158142c2005-03-13 16:54:06 +0000618 z -= LIT64( 0x100000000 );
619 b1 = b<<32;
620 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
621 }
622 rem0 = ( rem0<<32 ) | ( rem1>>32 );
623 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
624 return z;
625
626}
627
628/*----------------------------------------------------------------------------
629| Returns an approximation to the square root of the 32-bit significand given
630| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
631| `aExp' (the least significant bit) is 1, the integer returned approximates
632| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
633| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
634| case, the approximation returned lies strictly within +/-2 of the exact
635| value.
636*----------------------------------------------------------------------------*/
637
Andreas Färber94a49d82012-04-26 00:15:56 +0200638static uint32_t estimateSqrt32(int_fast16_t aExp, uint32_t a)
bellard158142c2005-03-13 16:54:06 +0000639{
Andreas Färberbb98fe42011-03-07 01:34:06 +0100640 static const uint16_t sqrtOddAdjustments[] = {
bellard158142c2005-03-13 16:54:06 +0000641 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
642 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
643 };
Andreas Färberbb98fe42011-03-07 01:34:06 +0100644 static const uint16_t sqrtEvenAdjustments[] = {
bellard158142c2005-03-13 16:54:06 +0000645 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
646 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
647 };
648 int8 index;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100649 uint32_t z;
bellard158142c2005-03-13 16:54:06 +0000650
651 index = ( a>>27 ) & 15;
652 if ( aExp & 1 ) {
blueswir13f4cb3d2009-04-13 16:31:01 +0000653 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
bellard158142c2005-03-13 16:54:06 +0000654 z = ( ( a / z )<<14 ) + ( z<<15 );
655 a >>= 1;
656 }
657 else {
blueswir13f4cb3d2009-04-13 16:31:01 +0000658 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
bellard158142c2005-03-13 16:54:06 +0000659 z = a / z + z;
660 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
Andreas Färberbb98fe42011-03-07 01:34:06 +0100661 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
bellard158142c2005-03-13 16:54:06 +0000662 }
Andreas Färberbb98fe42011-03-07 01:34:06 +0100663 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
bellard158142c2005-03-13 16:54:06 +0000664
665}
666
667/*----------------------------------------------------------------------------
668| Returns the number of leading 0 bits before the most-significant 1 bit of
669| `a'. If `a' is zero, 32 is returned.
670*----------------------------------------------------------------------------*/
671
Andreas Färberbb98fe42011-03-07 01:34:06 +0100672static int8 countLeadingZeros32( uint32_t a )
bellard158142c2005-03-13 16:54:06 +0000673{
Aurelien Jarnob3b4c7f2011-04-14 00:49:29 +0200674#if SOFTFLOAT_GNUC_PREREQ(3, 4)
675 if (a) {
676 return __builtin_clz(a);
677 } else {
678 return 32;
679 }
680#else
bellard158142c2005-03-13 16:54:06 +0000681 static const int8 countLeadingZerosHigh[] = {
682 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
683 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
684 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
685 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
686 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
687 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
688 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
689 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
690 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
694 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
695 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
696 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
697 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
698 };
699 int8 shiftCount;
700
701 shiftCount = 0;
702 if ( a < 0x10000 ) {
703 shiftCount += 16;
704 a <<= 16;
705 }
706 if ( a < 0x1000000 ) {
707 shiftCount += 8;
708 a <<= 8;
709 }
710 shiftCount += countLeadingZerosHigh[ a>>24 ];
711 return shiftCount;
Aurelien Jarnob3b4c7f2011-04-14 00:49:29 +0200712#endif
bellard158142c2005-03-13 16:54:06 +0000713}
714
715/*----------------------------------------------------------------------------
716| Returns the number of leading 0 bits before the most-significant 1 bit of
717| `a'. If `a' is zero, 64 is returned.
718*----------------------------------------------------------------------------*/
719
Andreas Färberbb98fe42011-03-07 01:34:06 +0100720static int8 countLeadingZeros64( uint64_t a )
bellard158142c2005-03-13 16:54:06 +0000721{
Aurelien Jarnob3b4c7f2011-04-14 00:49:29 +0200722#if SOFTFLOAT_GNUC_PREREQ(3, 4)
723 if (a) {
724 return __builtin_clzll(a);
725 } else {
726 return 64;
727 }
728#else
bellard158142c2005-03-13 16:54:06 +0000729 int8 shiftCount;
730
731 shiftCount = 0;
Andreas Färberbb98fe42011-03-07 01:34:06 +0100732 if ( a < ( (uint64_t) 1 )<<32 ) {
bellard158142c2005-03-13 16:54:06 +0000733 shiftCount += 32;
734 }
735 else {
736 a >>= 32;
737 }
738 shiftCount += countLeadingZeros32( a );
739 return shiftCount;
Aurelien Jarnob3b4c7f2011-04-14 00:49:29 +0200740#endif
bellard158142c2005-03-13 16:54:06 +0000741}
742
743/*----------------------------------------------------------------------------
744| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
745| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
746| Otherwise, returns 0.
747*----------------------------------------------------------------------------*/
748
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400749static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
bellard158142c2005-03-13 16:54:06 +0000750{
751
752 return ( a0 == b0 ) && ( a1 == b1 );
753
754}
755
756/*----------------------------------------------------------------------------
757| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
758| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
759| Otherwise, returns 0.
760*----------------------------------------------------------------------------*/
761
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400762static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
bellard158142c2005-03-13 16:54:06 +0000763{
764
765 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
766
767}
768
769/*----------------------------------------------------------------------------
770| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
771| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
772| returns 0.
773*----------------------------------------------------------------------------*/
774
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400775static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
bellard158142c2005-03-13 16:54:06 +0000776{
777
778 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
779
780}
781
782/*----------------------------------------------------------------------------
783| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
784| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
785| Otherwise, returns 0.
786*----------------------------------------------------------------------------*/
787
Luiz Capitulinoa49db982014-06-19 10:13:43 -0400788static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
bellard158142c2005-03-13 16:54:06 +0000789{
790
791 return ( a0 != b0 ) || ( a1 != b1 );
792
793}