blob: b45a5e8690074b94ec1e22568b63b88dde724ace [file] [log] [blame]
Laurent Vivier591596b2018-02-24 21:18:00 +01001/*
2 * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator,
3 * derived from NetBSD M68040 FPSP functions,
4 * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic
5 * Package. Those parts of the code (and some later contributions) are
6 * provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file will be taken to be licensed under
14 * the Softfloat-2a license unless specifically indicated otherwise.
15 */
16
17/* Portions of this work are licensed under the terms of the GNU GPL,
18 * version 2 or later. See the COPYING file in the top-level directory.
19 */
20
21#include "qemu/osdep.h"
22#include "softfloat.h"
23#include "fpu/softfloat-macros.h"
Laurent Vivier4b5c65b2018-03-05 21:39:04 +010024#include "softfloat_fpsp_tables.h"
Laurent Vivier591596b2018-02-24 21:18:00 +010025
Laurent Vivierc84813b2018-03-12 21:27:24 +010026#define pi_exp 0x4000
Laurent Vivier8c992ab2018-03-12 21:27:22 +010027#define piby2_exp 0x3FFF
28#define pi_sig LIT64(0xc90fdaa22168c235)
29
Laurent Vivier0d379c12018-02-24 21:18:02 +010030static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status)
31{
32 if (floatx80_is_signaling_nan(a, status)) {
33 float_raise(float_flag_invalid, status);
Richard Henderson1c0c9512018-05-10 13:49:00 -070034 a = floatx80_silence_nan(a, status);
Laurent Vivier0d379c12018-02-24 21:18:02 +010035 }
36
37 if (status->default_nan_mode) {
38 return floatx80_default_nan(status);
39 }
40
Richard Henderson1c0c9512018-05-10 13:49:00 -070041 return a;
Laurent Vivier0d379c12018-02-24 21:18:02 +010042}
43
Laurent Vivier591596b2018-02-24 21:18:00 +010044/*----------------------------------------------------------------------------
45 | Returns the modulo remainder of the extended double-precision floating-point
46 | value `a' with respect to the corresponding value `b'.
47 *----------------------------------------------------------------------------*/
48
49floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status)
50{
51 flag aSign, zSign;
52 int32_t aExp, bExp, expDiff;
53 uint64_t aSig0, aSig1, bSig;
54 uint64_t qTemp, term0, term1;
55
56 aSig0 = extractFloatx80Frac(a);
57 aExp = extractFloatx80Exp(a);
58 aSign = extractFloatx80Sign(a);
59 bSig = extractFloatx80Frac(b);
60 bExp = extractFloatx80Exp(b);
61
62 if (aExp == 0x7FFF) {
63 if ((uint64_t) (aSig0 << 1)
64 || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) {
65 return propagateFloatx80NaN(a, b, status);
66 }
67 goto invalid;
68 }
69 if (bExp == 0x7FFF) {
70 if ((uint64_t) (bSig << 1)) {
71 return propagateFloatx80NaN(a, b, status);
72 }
73 return a;
74 }
75 if (bExp == 0) {
76 if (bSig == 0) {
77 invalid:
78 float_raise(float_flag_invalid, status);
79 return floatx80_default_nan(status);
80 }
81 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
82 }
83 if (aExp == 0) {
84 if ((uint64_t) (aSig0 << 1) == 0) {
85 return a;
86 }
87 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
88 }
89 bSig |= LIT64(0x8000000000000000);
90 zSign = aSign;
91 expDiff = aExp - bExp;
92 aSig1 = 0;
93 if (expDiff < 0) {
94 return a;
95 }
96 qTemp = (bSig <= aSig0);
97 if (qTemp) {
98 aSig0 -= bSig;
99 }
100 expDiff -= 64;
101 while (0 < expDiff) {
102 qTemp = estimateDiv128To64(aSig0, aSig1, bSig);
103 qTemp = (2 < qTemp) ? qTemp - 2 : 0;
104 mul64To128(bSig, qTemp, &term0, &term1);
105 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1);
106 shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1);
Laurent Vivier5a73e7f2018-05-08 22:39:37 +0200107 expDiff -= 62;
Laurent Vivier591596b2018-02-24 21:18:00 +0100108 }
109 expDiff += 64;
110 if (0 < expDiff) {
111 qTemp = estimateDiv128To64(aSig0, aSig1, bSig);
112 qTemp = (2 < qTemp) ? qTemp - 2 : 0;
113 qTemp >>= 64 - expDiff;
114 mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1);
115 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1);
116 shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1);
117 while (le128(term0, term1, aSig0, aSig1)) {
118 ++qTemp;
119 sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1);
120 }
121 }
122 return
123 normalizeRoundAndPackFloatx80(
124 80, zSign, bExp + expDiff, aSig0, aSig1, status);
125}
Laurent Vivier0d379c12018-02-24 21:18:02 +0100126
127/*----------------------------------------------------------------------------
128 | Returns the mantissa of the extended double-precision floating-point
129 | value `a'.
130 *----------------------------------------------------------------------------*/
131
132floatx80 floatx80_getman(floatx80 a, float_status *status)
133{
134 flag aSign;
135 int32_t aExp;
136 uint64_t aSig;
137
138 aSig = extractFloatx80Frac(a);
139 aExp = extractFloatx80Exp(a);
140 aSign = extractFloatx80Sign(a);
141
142 if (aExp == 0x7FFF) {
143 if ((uint64_t) (aSig << 1)) {
144 return propagateFloatx80NaNOneArg(a , status);
145 }
146 float_raise(float_flag_invalid , status);
147 return floatx80_default_nan(status);
148 }
149
150 if (aExp == 0) {
151 if (aSig == 0) {
152 return packFloatx80(aSign, 0, 0);
153 }
154 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
155 }
156
157 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
158 0x3FFF, aSig, 0, status);
159}
160
161/*----------------------------------------------------------------------------
162 | Returns the exponent of the extended double-precision floating-point
163 | value `a' as an extended double-precision value.
164 *----------------------------------------------------------------------------*/
165
166floatx80 floatx80_getexp(floatx80 a, float_status *status)
167{
168 flag aSign;
169 int32_t aExp;
170 uint64_t aSig;
171
172 aSig = extractFloatx80Frac(a);
173 aExp = extractFloatx80Exp(a);
174 aSign = extractFloatx80Sign(a);
175
176 if (aExp == 0x7FFF) {
177 if ((uint64_t) (aSig << 1)) {
178 return propagateFloatx80NaNOneArg(a , status);
179 }
180 float_raise(float_flag_invalid , status);
181 return floatx80_default_nan(status);
182 }
183
184 if (aExp == 0) {
185 if (aSig == 0) {
186 return packFloatx80(aSign, 0, 0);
187 }
188 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
189 }
190
191 return int32_to_floatx80(aExp - 0x3FFF, status);
192}
193
194/*----------------------------------------------------------------------------
195 | Scales extended double-precision floating-point value in operand `a' by
196 | value `b'. The function truncates the value in the second operand 'b' to
197 | an integral value and adds that value to the exponent of the operand 'a'.
198 | The operation performed according to the IEC/IEEE Standard for Binary
199 | Floating-Point Arithmetic.
200 *----------------------------------------------------------------------------*/
201
202floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
203{
204 flag aSign, bSign;
205 int32_t aExp, bExp, shiftCount;
206 uint64_t aSig, bSig;
207
208 aSig = extractFloatx80Frac(a);
209 aExp = extractFloatx80Exp(a);
210 aSign = extractFloatx80Sign(a);
211 bSig = extractFloatx80Frac(b);
212 bExp = extractFloatx80Exp(b);
213 bSign = extractFloatx80Sign(b);
214
215 if (bExp == 0x7FFF) {
216 if ((uint64_t) (bSig << 1) ||
217 ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) {
218 return propagateFloatx80NaN(a, b, status);
219 }
220 float_raise(float_flag_invalid , status);
221 return floatx80_default_nan(status);
222 }
223 if (aExp == 0x7FFF) {
224 if ((uint64_t) (aSig << 1)) {
225 return propagateFloatx80NaN(a, b, status);
226 }
227 return packFloatx80(aSign, floatx80_infinity.high,
228 floatx80_infinity.low);
229 }
230 if (aExp == 0) {
231 if (aSig == 0) {
232 return packFloatx80(aSign, 0, 0);
233 }
234 if (bExp < 0x3FFF) {
235 return a;
236 }
237 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
238 }
239
240 if (bExp < 0x3FFF) {
241 return a;
242 }
243
244 if (0x400F < bExp) {
245 aExp = bSign ? -0x6001 : 0xE000;
246 return roundAndPackFloatx80(status->floatx80_rounding_precision,
247 aSign, aExp, aSig, 0, status);
248 }
249
250 shiftCount = 0x403E - bExp;
251 bSig >>= shiftCount;
252 aExp = bSign ? (aExp - bSig) : (aExp + bSig);
253
254 return roundAndPackFloatx80(status->floatx80_rounding_precision,
255 aSign, aExp, aSig, 0, status);
256}
Laurent Vivier9a069772018-03-05 21:39:03 +0100257
258floatx80 floatx80_move(floatx80 a, float_status *status)
259{
260 flag aSign;
261 int32_t aExp;
262 uint64_t aSig;
263
264 aSig = extractFloatx80Frac(a);
265 aExp = extractFloatx80Exp(a);
266 aSign = extractFloatx80Sign(a);
267
268 if (aExp == 0x7FFF) {
269 if ((uint64_t)(aSig << 1)) {
270 return propagateFloatx80NaNOneArg(a, status);
271 }
272 return a;
273 }
274 if (aExp == 0) {
275 if (aSig == 0) {
276 return a;
277 }
278 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
279 aSign, aExp, aSig, 0, status);
280 }
281 return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign,
282 aExp, aSig, 0, status);
283}
Laurent Vivier4b5c65b2018-03-05 21:39:04 +0100284
285/*----------------------------------------------------------------------------
286| Algorithms for transcendental functions supported by MC68881 and MC68882
287| mathematical coprocessors. The functions are derived from FPSP library.
288*----------------------------------------------------------------------------*/
289
290#define one_exp 0x3FFF
291#define one_sig LIT64(0x8000000000000000)
292
293/*----------------------------------------------------------------------------
294 | Function for compactifying extended double-precision floating point values.
295 *----------------------------------------------------------------------------*/
296
297static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig)
298{
299 return (aExp << 16) | (aSig >> 48);
300}
301
302/*----------------------------------------------------------------------------
303 | Log base e of x plus 1
304 *----------------------------------------------------------------------------*/
305
306floatx80 floatx80_lognp1(floatx80 a, float_status *status)
307{
308 flag aSign;
309 int32_t aExp;
310 uint64_t aSig, fSig;
311
312 int8_t user_rnd_mode, user_rnd_prec;
313
314 int32_t compact, j, k;
315 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
316
317 aSig = extractFloatx80Frac(a);
318 aExp = extractFloatx80Exp(a);
319 aSign = extractFloatx80Sign(a);
320
321 if (aExp == 0x7FFF) {
322 if ((uint64_t) (aSig << 1)) {
323 propagateFloatx80NaNOneArg(a, status);
324 }
325 if (aSign) {
326 float_raise(float_flag_invalid, status);
327 return floatx80_default_nan(status);
328 }
329 return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low);
330 }
331
332 if (aExp == 0 && aSig == 0) {
333 return packFloatx80(aSign, 0, 0);
334 }
335
336 if (aSign && aExp >= one_exp) {
337 if (aExp == one_exp && aSig == one_sig) {
338 float_raise(float_flag_divbyzero, status);
Laurent Vivier981348a2018-04-30 19:01:55 +0200339 return packFloatx80(aSign, floatx80_infinity.high,
340 floatx80_infinity.low);
Laurent Vivier4b5c65b2018-03-05 21:39:04 +0100341 }
342 float_raise(float_flag_invalid, status);
343 return floatx80_default_nan(status);
344 }
345
346 if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) {
347 /* <= min threshold */
348 float_raise(float_flag_inexact, status);
349 return floatx80_move(a, status);
350 }
351
352 user_rnd_mode = status->float_rounding_mode;
353 user_rnd_prec = status->floatx80_rounding_precision;
354 status->float_rounding_mode = float_round_nearest_even;
355 status->floatx80_rounding_precision = 80;
356
357 compact = floatx80_make_compact(aExp, aSig);
358
359 fp0 = a; /* Z */
360 fp1 = a;
361
362 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
363 status), status); /* X = (1+Z) */
364
365 aExp = extractFloatx80Exp(fp0);
366 aSig = extractFloatx80Frac(fp0);
367
368 compact = floatx80_make_compact(aExp, aSig);
369
370 if (compact < 0x3FFE8000 || compact > 0x3FFFC000) {
371 /* |X| < 1/2 or |X| > 3/2 */
372 k = aExp - 0x3FFF;
373 fp1 = int32_to_floatx80(k, status);
374
375 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000);
376 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
377
378 f = packFloatx80(0, 0x3FFF, fSig); /* F */
379 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
380
381 fp0 = floatx80_sub(fp0, f, status); /* Y-F */
382
383 lp1cont1:
384 /* LP1CONT1 */
385 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
386 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC));
387 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
388 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
389
390 fp3 = fp2;
391 fp1 = fp2;
392
393 fp1 = floatx80_mul(fp1, float64_to_floatx80(
394 make_float64(0x3FC2499AB5E4040B), status),
395 status); /* V*A6 */
396 fp2 = floatx80_mul(fp2, float64_to_floatx80(
397 make_float64(0xBFC555B5848CB7DB), status),
398 status); /* V*A5 */
399 fp1 = floatx80_add(fp1, float64_to_floatx80(
400 make_float64(0x3FC99999987D8730), status),
401 status); /* A4+V*A6 */
402 fp2 = floatx80_add(fp2, float64_to_floatx80(
403 make_float64(0xBFCFFFFFFF6F7E97), status),
404 status); /* A3+V*A5 */
405 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
406 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
407 fp1 = floatx80_add(fp1, float64_to_floatx80(
408 make_float64(0x3FD55555555555A4), status),
409 status); /* A2+V*(A4+V*A6) */
410 fp2 = floatx80_add(fp2, float64_to_floatx80(
411 make_float64(0xBFE0000000000008), status),
412 status); /* A1+V*(A3+V*A5) */
413 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
414 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
415 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
416 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
417
418 fp1 = floatx80_add(fp1, log_tbl[j + 1],
419 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
420 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
421
422 status->float_rounding_mode = user_rnd_mode;
423 status->floatx80_rounding_precision = user_rnd_prec;
424
425 a = floatx80_add(fp0, klog2, status);
426
427 float_raise(float_flag_inexact, status);
428
429 return a;
430 } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
431 /* |X| < 1/16 or |X| > -1/16 */
432 /* LP1CARE */
433 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000);
434 f = packFloatx80(0, 0x3FFF, fSig); /* F */
435 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
436
437 if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */
438 /* KISZERO */
439 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000),
440 status), f, status); /* 1-F */
441 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */
442 fp1 = packFloatx80(0, 0, 0); /* K = 0 */
443 } else {
444 /* KISNEG */
445 fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000),
446 status), f, status); /* 2-F */
447 fp1 = floatx80_add(fp1, fp1, status); /* 2Z */
448 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */
449 fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */
450 }
451 goto lp1cont1;
452 } else {
453 /* LP1ONE16 */
454 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */
455 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
456 status), status); /* FP0 IS 1+X */
457
458 /* LP1CONT2 */
459 fp1 = floatx80_div(fp1, fp0, status); /* U */
460 saveu = fp1;
461 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
462 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
463
464 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
465 status); /* B5 */
466 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
467 status); /* B4 */
468 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
469 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
470 fp3 = floatx80_add(fp3, float64_to_floatx80(
471 make_float64(0x3F624924928BCCFF), status),
472 status); /* B3+W*B5 */
473 fp2 = floatx80_add(fp2, float64_to_floatx80(
474 make_float64(0x3F899999999995EC), status),
475 status); /* B2+W*B4 */
476 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
477 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
478 fp1 = floatx80_add(fp1, float64_to_floatx80(
479 make_float64(0x3FB5555555555555), status),
480 status); /* B1+W*(B3+W*B5) */
481
482 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
483 fp1 = floatx80_add(fp1, fp2,
484 status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
485 fp0 = floatx80_mul(fp0, fp1,
486 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
487
488 status->float_rounding_mode = user_rnd_mode;
489 status->floatx80_rounding_precision = user_rnd_prec;
490
491 a = floatx80_add(fp0, saveu, status);
492
493 /*if (!floatx80_is_zero(a)) { */
494 float_raise(float_flag_inexact, status);
495 /*} */
496
497 return a;
498 }
499}
Laurent Vivier50067bd2018-03-05 21:39:05 +0100500
501/*----------------------------------------------------------------------------
502 | Log base e
503 *----------------------------------------------------------------------------*/
504
505floatx80 floatx80_logn(floatx80 a, float_status *status)
506{
507 flag aSign;
508 int32_t aExp;
509 uint64_t aSig, fSig;
510
511 int8_t user_rnd_mode, user_rnd_prec;
512
513 int32_t compact, j, k, adjk;
514 floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu;
515
516 aSig = extractFloatx80Frac(a);
517 aExp = extractFloatx80Exp(a);
518 aSign = extractFloatx80Sign(a);
519
520 if (aExp == 0x7FFF) {
521 if ((uint64_t) (aSig << 1)) {
522 propagateFloatx80NaNOneArg(a, status);
523 }
524 if (aSign == 0) {
525 return packFloatx80(0, floatx80_infinity.high,
526 floatx80_infinity.low);
527 }
528 }
529
530 adjk = 0;
531
532 if (aExp == 0) {
533 if (aSig == 0) { /* zero */
534 float_raise(float_flag_divbyzero, status);
535 return packFloatx80(1, floatx80_infinity.high,
536 floatx80_infinity.low);
537 }
538 if ((aSig & one_sig) == 0) { /* denormal */
539 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
540 adjk = -100;
541 aExp += 100;
542 a = packFloatx80(aSign, aExp, aSig);
543 }
544 }
545
546 if (aSign) {
547 float_raise(float_flag_invalid, status);
548 return floatx80_default_nan(status);
549 }
550
551 user_rnd_mode = status->float_rounding_mode;
552 user_rnd_prec = status->floatx80_rounding_precision;
553 status->float_rounding_mode = float_round_nearest_even;
554 status->floatx80_rounding_precision = 80;
555
556 compact = floatx80_make_compact(aExp, aSig);
557
558 if (compact < 0x3FFEF07D || compact > 0x3FFF8841) {
559 /* |X| < 15/16 or |X| > 17/16 */
560 k = aExp - 0x3FFF;
561 k += adjk;
562 fp1 = int32_to_floatx80(k, status);
563
564 fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000);
565 j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */
566
567 f = packFloatx80(0, 0x3FFF, fSig); /* F */
568 fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */
569
570 fp0 = floatx80_sub(fp0, f, status); /* Y-F */
571
572 /* LP1CONT1 */
573 fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */
574 logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC));
575 klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */
576 fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */
577
578 fp3 = fp2;
579 fp1 = fp2;
580
581 fp1 = floatx80_mul(fp1, float64_to_floatx80(
582 make_float64(0x3FC2499AB5E4040B), status),
583 status); /* V*A6 */
584 fp2 = floatx80_mul(fp2, float64_to_floatx80(
585 make_float64(0xBFC555B5848CB7DB), status),
586 status); /* V*A5 */
587 fp1 = floatx80_add(fp1, float64_to_floatx80(
588 make_float64(0x3FC99999987D8730), status),
589 status); /* A4+V*A6 */
590 fp2 = floatx80_add(fp2, float64_to_floatx80(
591 make_float64(0xBFCFFFFFFF6F7E97), status),
592 status); /* A3+V*A5 */
593 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */
594 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */
595 fp1 = floatx80_add(fp1, float64_to_floatx80(
596 make_float64(0x3FD55555555555A4), status),
597 status); /* A2+V*(A4+V*A6) */
598 fp2 = floatx80_add(fp2, float64_to_floatx80(
599 make_float64(0xBFE0000000000008), status),
600 status); /* A1+V*(A3+V*A5) */
601 fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */
602 fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */
603 fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */
604 fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */
605
606 fp1 = floatx80_add(fp1, log_tbl[j + 1],
607 status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */
608 fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */
609
610 status->float_rounding_mode = user_rnd_mode;
611 status->floatx80_rounding_precision = user_rnd_prec;
612
613 a = floatx80_add(fp0, klog2, status);
614
615 float_raise(float_flag_inexact, status);
616
617 return a;
618 } else { /* |X-1| >= 1/16 */
619 fp0 = a;
620 fp1 = a;
621 fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000),
622 status), status); /* FP1 IS X-1 */
623 fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
624 status), status); /* FP0 IS X+1 */
625 fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */
626
627 /* LP1CONT2 */
628 fp1 = floatx80_div(fp1, fp0, status); /* U */
629 saveu = fp1;
630 fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */
631 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */
632
633 fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6),
634 status); /* B5 */
635 fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0),
636 status); /* B4 */
637 fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */
638 fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */
639 fp3 = floatx80_add(fp3, float64_to_floatx80(
640 make_float64(0x3F624924928BCCFF), status),
641 status); /* B3+W*B5 */
642 fp2 = floatx80_add(fp2, float64_to_floatx80(
643 make_float64(0x3F899999999995EC), status),
644 status); /* B2+W*B4 */
645 fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */
646 fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */
647 fp1 = floatx80_add(fp1, float64_to_floatx80(
648 make_float64(0x3FB5555555555555), status),
649 status); /* B1+W*(B3+W*B5) */
650
651 fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */
652 fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */
653 fp0 = floatx80_mul(fp0, fp1,
654 status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */
655
656 status->float_rounding_mode = user_rnd_mode;
657 status->floatx80_rounding_precision = user_rnd_prec;
658
659 a = floatx80_add(fp0, saveu, status);
660
661 /*if (!floatx80_is_zero(a)) { */
662 float_raise(float_flag_inexact, status);
663 /*} */
664
665 return a;
666 }
667}
Laurent Vivier248efb62018-03-05 21:39:06 +0100668
669/*----------------------------------------------------------------------------
670 | Log base 10
671 *----------------------------------------------------------------------------*/
672
673floatx80 floatx80_log10(floatx80 a, float_status *status)
674{
675 flag aSign;
676 int32_t aExp;
677 uint64_t aSig;
678
679 int8_t user_rnd_mode, user_rnd_prec;
680
681 floatx80 fp0, fp1;
682
683 aSig = extractFloatx80Frac(a);
684 aExp = extractFloatx80Exp(a);
685 aSign = extractFloatx80Sign(a);
686
687 if (aExp == 0x7FFF) {
688 if ((uint64_t) (aSig << 1)) {
689 propagateFloatx80NaNOneArg(a, status);
690 }
691 if (aSign == 0) {
692 return packFloatx80(0, floatx80_infinity.high,
693 floatx80_infinity.low);
694 }
695 }
696
697 if (aExp == 0 && aSig == 0) {
698 float_raise(float_flag_divbyzero, status);
699 return packFloatx80(1, floatx80_infinity.high,
700 floatx80_infinity.low);
701 }
702
703 if (aSign) {
704 float_raise(float_flag_invalid, status);
705 return floatx80_default_nan(status);
706 }
707
708 user_rnd_mode = status->float_rounding_mode;
709 user_rnd_prec = status->floatx80_rounding_precision;
710 status->float_rounding_mode = float_round_nearest_even;
711 status->floatx80_rounding_precision = 80;
712
713 fp0 = floatx80_logn(a, status);
714 fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); /* INV_L10 */
715
716 status->float_rounding_mode = user_rnd_mode;
717 status->floatx80_rounding_precision = user_rnd_prec;
718
719 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */
720
721 float_raise(float_flag_inexact, status);
722
723 return a;
724}
Laurent Vivier67b453e2018-03-05 21:39:07 +0100725
726/*----------------------------------------------------------------------------
727 | Log base 2
728 *----------------------------------------------------------------------------*/
729
730floatx80 floatx80_log2(floatx80 a, float_status *status)
731{
732 flag aSign;
733 int32_t aExp;
734 uint64_t aSig;
735
736 int8_t user_rnd_mode, user_rnd_prec;
737
738 floatx80 fp0, fp1;
739
740 aSig = extractFloatx80Frac(a);
741 aExp = extractFloatx80Exp(a);
742 aSign = extractFloatx80Sign(a);
743
744 if (aExp == 0x7FFF) {
745 if ((uint64_t) (aSig << 1)) {
746 propagateFloatx80NaNOneArg(a, status);
747 }
748 if (aSign == 0) {
749 return packFloatx80(0, floatx80_infinity.high,
750 floatx80_infinity.low);
751 }
752 }
753
754 if (aExp == 0) {
755 if (aSig == 0) {
756 float_raise(float_flag_divbyzero, status);
757 return packFloatx80(1, floatx80_infinity.high,
758 floatx80_infinity.low);
759 }
760 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
761 }
762
763 if (aSign) {
764 float_raise(float_flag_invalid, status);
765 return floatx80_default_nan(status);
766 }
767
768 user_rnd_mode = status->float_rounding_mode;
769 user_rnd_prec = status->floatx80_rounding_precision;
770 status->float_rounding_mode = float_round_nearest_even;
771 status->floatx80_rounding_precision = 80;
772
773 if (aSig == one_sig) { /* X is 2^k */
774 status->float_rounding_mode = user_rnd_mode;
775 status->floatx80_rounding_precision = user_rnd_prec;
776
777 a = int32_to_floatx80(aExp - 0x3FFF, status);
778 } else {
779 fp0 = floatx80_logn(a, status);
780 fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); /* INV_L2 */
781
782 status->float_rounding_mode = user_rnd_mode;
783 status->floatx80_rounding_precision = user_rnd_prec;
784
785 a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */
786 }
787
788 float_raise(float_flag_inexact, status);
789
790 return a;
791}
Laurent Vivier40ad0872018-03-05 21:39:08 +0100792
793/*----------------------------------------------------------------------------
794 | e to x
795 *----------------------------------------------------------------------------*/
796
797floatx80 floatx80_etox(floatx80 a, float_status *status)
798{
799 flag aSign;
800 int32_t aExp;
801 uint64_t aSig;
802
803 int8_t user_rnd_mode, user_rnd_prec;
804
805 int32_t compact, n, j, k, m, m1;
806 floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale;
807 flag adjflag;
808
809 aSig = extractFloatx80Frac(a);
810 aExp = extractFloatx80Exp(a);
811 aSign = extractFloatx80Sign(a);
812
813 if (aExp == 0x7FFF) {
814 if ((uint64_t) (aSig << 1)) {
815 return propagateFloatx80NaNOneArg(a, status);
816 }
817 if (aSign) {
818 return packFloatx80(0, 0, 0);
819 }
820 return packFloatx80(0, floatx80_infinity.high,
821 floatx80_infinity.low);
822 }
823
824 if (aExp == 0 && aSig == 0) {
825 return packFloatx80(0, one_exp, one_sig);
826 }
827
828 user_rnd_mode = status->float_rounding_mode;
829 user_rnd_prec = status->floatx80_rounding_precision;
830 status->float_rounding_mode = float_round_nearest_even;
831 status->floatx80_rounding_precision = 80;
832
833 adjflag = 0;
834
835 if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */
836 compact = floatx80_make_compact(aExp, aSig);
837
838 if (compact < 0x400CB167) { /* |X| < 16380 log2 */
839 fp0 = a;
840 fp1 = a;
841 fp0 = floatx80_mul(fp0, float32_to_floatx80(
842 make_float32(0x42B8AA3B), status),
843 status); /* 64/log2 * X */
844 adjflag = 0;
845 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
846 fp0 = int32_to_floatx80(n, status);
847
848 j = n & 0x3F; /* J = N mod 64 */
849 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
850 if (n < 0 && j) {
851 /* arithmetic right shift is division and
852 * round towards minus infinity
853 */
854 m--;
855 }
856 m += 0x3FFF; /* biased exponent of 2^(M) */
857
858 expcont1:
859 fp2 = fp0; /* N */
860 fp0 = floatx80_mul(fp0, float32_to_floatx80(
861 make_float32(0xBC317218), status),
862 status); /* N * L1, L1 = lead(-log2/64) */
863 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6));
864 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
865 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
866 fp0 = floatx80_add(fp0, fp2, status); /* R */
867
868 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
869 fp2 = float32_to_floatx80(make_float32(0x3AB60B70),
870 status); /* A5 */
871 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */
872 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895),
873 status), fp1,
874 status); /* fp3 is S*A4 */
875 fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64(
876 0x3FA5555555554431), status),
877 status); /* fp2 is A3+S*A5 */
878 fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64(
879 0x3FC5555555554018), status),
880 status); /* fp3 is A2+S*A4 */
881 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */
882 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */
883 fp2 = floatx80_add(fp2, float32_to_floatx80(
884 make_float32(0x3F000000), status),
885 status); /* fp2 is A1+S*(A3+S*A5) */
886 fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */
887 fp2 = floatx80_mul(fp2, fp1,
888 status); /* fp2 IS S*(A1+S*(A3+S*A5)) */
889 fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */
890 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
891
892 fp1 = exp_tbl[j];
893 fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */
894 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status),
895 status); /* accurate 2^(J/64) */
896 fp0 = floatx80_add(fp0, fp1,
897 status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */
898
899 scale = packFloatx80(0, m, one_sig);
900 if (adjflag) {
901 adjscale = packFloatx80(0, m1, one_sig);
902 fp0 = floatx80_mul(fp0, adjscale, status);
903 }
904
905 status->float_rounding_mode = user_rnd_mode;
906 status->floatx80_rounding_precision = user_rnd_prec;
907
908 a = floatx80_mul(fp0, scale, status);
909
910 float_raise(float_flag_inexact, status);
911
912 return a;
913 } else { /* |X| >= 16380 log2 */
914 if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */
915 status->float_rounding_mode = user_rnd_mode;
916 status->floatx80_rounding_precision = user_rnd_prec;
917 if (aSign) {
918 a = roundAndPackFloatx80(
919 status->floatx80_rounding_precision,
920 0, -0x1000, aSig, 0, status);
921 } else {
922 a = roundAndPackFloatx80(
923 status->floatx80_rounding_precision,
924 0, 0x8000, aSig, 0, status);
925 }
926 float_raise(float_flag_inexact, status);
927
928 return a;
929 } else {
930 fp0 = a;
931 fp1 = a;
932 fp0 = floatx80_mul(fp0, float32_to_floatx80(
933 make_float32(0x42B8AA3B), status),
934 status); /* 64/log2 * X */
935 adjflag = 1;
936 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
937 fp0 = int32_to_floatx80(n, status);
938
939 j = n & 0x3F; /* J = N mod 64 */
940 /* NOTE: this is really arithmetic right shift by 6 */
941 k = n / 64;
942 if (n < 0 && j) {
943 /* arithmetic right shift is division and
944 * round towards minus infinity
945 */
946 k--;
947 }
948 /* NOTE: this is really arithmetic right shift by 1 */
949 m1 = k / 2;
950 if (k < 0 && (k & 1)) {
951 /* arithmetic right shift is division and
952 * round towards minus infinity
953 */
954 m1--;
955 }
956 m = k - m1;
957 m1 += 0x3FFF; /* biased exponent of 2^(M1) */
958 m += 0x3FFF; /* biased exponent of 2^(M) */
959
960 goto expcont1;
961 }
962 }
963 } else { /* |X| < 2^(-65) */
964 status->float_rounding_mode = user_rnd_mode;
965 status->floatx80_rounding_precision = user_rnd_prec;
966
967 a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000),
968 status), status); /* 1 + X */
969
970 float_raise(float_flag_inexact, status);
971
972 return a;
973 }
974}
Laurent Vivier068f1612018-03-05 21:39:09 +0100975
976/*----------------------------------------------------------------------------
977 | 2 to x
978 *----------------------------------------------------------------------------*/
979
980floatx80 floatx80_twotox(floatx80 a, float_status *status)
981{
982 flag aSign;
983 int32_t aExp;
984 uint64_t aSig;
985
986 int8_t user_rnd_mode, user_rnd_prec;
987
988 int32_t compact, n, j, l, m, m1;
989 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
990
991 aSig = extractFloatx80Frac(a);
992 aExp = extractFloatx80Exp(a);
993 aSign = extractFloatx80Sign(a);
994
995 if (aExp == 0x7FFF) {
996 if ((uint64_t) (aSig << 1)) {
997 return propagateFloatx80NaNOneArg(a, status);
998 }
999 if (aSign) {
1000 return packFloatx80(0, 0, 0);
1001 }
1002 return packFloatx80(0, floatx80_infinity.high,
1003 floatx80_infinity.low);
1004 }
1005
1006 if (aExp == 0 && aSig == 0) {
1007 return packFloatx80(0, one_exp, one_sig);
1008 }
1009
1010 user_rnd_mode = status->float_rounding_mode;
1011 user_rnd_prec = status->floatx80_rounding_precision;
1012 status->float_rounding_mode = float_round_nearest_even;
1013 status->floatx80_rounding_precision = 80;
1014
1015 fp0 = a;
1016
1017 compact = floatx80_make_compact(aExp, aSig);
1018
1019 if (compact < 0x3FB98000 || compact > 0x400D80C0) {
1020 /* |X| > 16480 or |X| < 2^(-70) */
1021 if (compact > 0x3FFF8000) { /* |X| > 16480 */
1022 status->float_rounding_mode = user_rnd_mode;
1023 status->floatx80_rounding_precision = user_rnd_prec;
1024
1025 if (aSign) {
1026 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1027 0, -0x1000, aSig, 0, status);
1028 } else {
1029 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1030 0, 0x8000, aSig, 0, status);
1031 }
1032 } else { /* |X| < 2^(-70) */
1033 status->float_rounding_mode = user_rnd_mode;
1034 status->floatx80_rounding_precision = user_rnd_prec;
1035
1036 a = floatx80_add(fp0, float32_to_floatx80(
1037 make_float32(0x3F800000), status),
1038 status); /* 1 + X */
1039
1040 float_raise(float_flag_inexact, status);
1041
1042 return a;
1043 }
1044 } else { /* 2^(-70) <= |X| <= 16480 */
1045 fp1 = fp0; /* X */
1046 fp1 = floatx80_mul(fp1, float32_to_floatx80(
1047 make_float32(0x42800000), status),
1048 status); /* X * 64 */
1049 n = floatx80_to_int32(fp1, status);
1050 fp1 = int32_to_floatx80(n, status);
1051 j = n & 0x3F;
1052 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
1053 if (n < 0 && j) {
1054 /* arithmetic right shift is division and
1055 * round towards minus infinity
1056 */
1057 l--;
1058 }
1059 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
1060 if (l < 0 && (l & 1)) {
1061 /* arithmetic right shift is division and
1062 * round towards minus infinity
1063 */
1064 m--;
1065 }
1066 m1 = l - m;
1067 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
1068
1069 adjfact = packFloatx80(0, m1, one_sig);
1070 fact1 = exp2_tbl[j];
1071 fact1.high += m;
1072 fact2.high = exp2_tbl2[j] >> 16;
1073 fact2.high += m;
1074 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
1075 fact2.low <<= 48;
1076
1077 fp1 = floatx80_mul(fp1, float32_to_floatx80(
1078 make_float32(0x3C800000), status),
1079 status); /* (1/64)*N */
1080 fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */
1081 fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); /* LOG2 */
1082 fp0 = floatx80_mul(fp0, fp2, status); /* R */
1083
1084 /* EXPR */
1085 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1086 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
1087 status); /* A5 */
1088 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
1089 status); /* A4 */
1090 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
1091 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
1092 fp2 = floatx80_add(fp2, float64_to_floatx80(
1093 make_float64(0x3FA5555555554CC1), status),
1094 status); /* A3+S*A5 */
1095 fp3 = floatx80_add(fp3, float64_to_floatx80(
1096 make_float64(0x3FC5555555554A54), status),
1097 status); /* A2+S*A4 */
1098 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
1099 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
1100 fp2 = floatx80_add(fp2, float64_to_floatx80(
1101 make_float64(0x3FE0000000000000), status),
1102 status); /* A1+S*(A3+S*A5) */
1103 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
1104
1105 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
1106 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
1107 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
1108
1109 fp0 = floatx80_mul(fp0, fact1, status);
1110 fp0 = floatx80_add(fp0, fact2, status);
1111 fp0 = floatx80_add(fp0, fact1, status);
1112
1113 status->float_rounding_mode = user_rnd_mode;
1114 status->floatx80_rounding_precision = user_rnd_prec;
1115
1116 a = floatx80_mul(fp0, adjfact, status);
1117
1118 float_raise(float_flag_inexact, status);
1119
1120 return a;
1121 }
1122}
Laurent Vivier6c25be62018-03-05 21:39:10 +01001123
1124/*----------------------------------------------------------------------------
1125 | 10 to x
1126 *----------------------------------------------------------------------------*/
1127
1128floatx80 floatx80_tentox(floatx80 a, float_status *status)
1129{
1130 flag aSign;
1131 int32_t aExp;
1132 uint64_t aSig;
1133
1134 int8_t user_rnd_mode, user_rnd_prec;
1135
1136 int32_t compact, n, j, l, m, m1;
1137 floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2;
1138
1139 aSig = extractFloatx80Frac(a);
1140 aExp = extractFloatx80Exp(a);
1141 aSign = extractFloatx80Sign(a);
1142
1143 if (aExp == 0x7FFF) {
1144 if ((uint64_t) (aSig << 1)) {
1145 return propagateFloatx80NaNOneArg(a, status);
1146 }
1147 if (aSign) {
1148 return packFloatx80(0, 0, 0);
1149 }
1150 return packFloatx80(0, floatx80_infinity.high,
1151 floatx80_infinity.low);
1152 }
1153
1154 if (aExp == 0 && aSig == 0) {
1155 return packFloatx80(0, one_exp, one_sig);
1156 }
1157
1158 user_rnd_mode = status->float_rounding_mode;
1159 user_rnd_prec = status->floatx80_rounding_precision;
1160 status->float_rounding_mode = float_round_nearest_even;
1161 status->floatx80_rounding_precision = 80;
1162
1163 fp0 = a;
1164
1165 compact = floatx80_make_compact(aExp, aSig);
1166
1167 if (compact < 0x3FB98000 || compact > 0x400B9B07) {
1168 /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */
1169 if (compact > 0x3FFF8000) { /* |X| > 16480 */
1170 status->float_rounding_mode = user_rnd_mode;
1171 status->floatx80_rounding_precision = user_rnd_prec;
1172
1173 if (aSign) {
1174 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1175 0, -0x1000, aSig, 0, status);
1176 } else {
1177 return roundAndPackFloatx80(status->floatx80_rounding_precision,
1178 0, 0x8000, aSig, 0, status);
1179 }
1180 } else { /* |X| < 2^(-70) */
1181 status->float_rounding_mode = user_rnd_mode;
1182 status->floatx80_rounding_precision = user_rnd_prec;
1183
1184 a = floatx80_add(fp0, float32_to_floatx80(
1185 make_float32(0x3F800000), status),
1186 status); /* 1 + X */
1187
1188 float_raise(float_flag_inexact, status);
1189
1190 return a;
1191 }
1192 } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */
1193 fp1 = fp0; /* X */
1194 fp1 = floatx80_mul(fp1, float64_to_floatx80(
1195 make_float64(0x406A934F0979A371),
1196 status), status); /* X*64*LOG10/LOG2 */
1197 n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */
1198 fp1 = int32_to_floatx80(n, status);
1199
1200 j = n & 0x3F;
1201 l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
1202 if (n < 0 && j) {
1203 /* arithmetic right shift is division and
1204 * round towards minus infinity
1205 */
1206 l--;
1207 }
1208 m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */
1209 if (l < 0 && (l & 1)) {
1210 /* arithmetic right shift is division and
1211 * round towards minus infinity
1212 */
1213 m--;
1214 }
1215 m1 = l - m;
1216 m1 += 0x3FFF; /* ADJFACT IS 2^(M') */
1217
1218 adjfact = packFloatx80(0, m1, one_sig);
1219 fact1 = exp2_tbl[j];
1220 fact1.high += m;
1221 fact2.high = exp2_tbl2[j] >> 16;
1222 fact2.high += m;
1223 fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF);
1224 fact2.low <<= 48;
1225
1226 fp2 = fp1; /* N */
1227 fp1 = floatx80_mul(fp1, float64_to_floatx80(
1228 make_float64(0x3F734413509F8000), status),
1229 status); /* N*(LOG2/64LOG10)_LEAD */
1230 fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2));
1231 fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */
1232 fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */
1233 fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */
1234 fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); /* LOG10 */
1235 fp0 = floatx80_mul(fp0, fp2, status); /* R */
1236
1237 /* EXPR */
1238 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1239 fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2),
1240 status); /* A5 */
1241 fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C),
1242 status); /* A4 */
1243 fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */
1244 fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */
1245 fp2 = floatx80_add(fp2, float64_to_floatx80(
1246 make_float64(0x3FA5555555554CC1), status),
1247 status); /* A3+S*A5 */
1248 fp3 = floatx80_add(fp3, float64_to_floatx80(
1249 make_float64(0x3FC5555555554A54), status),
1250 status); /* A2+S*A4 */
1251 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */
1252 fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */
1253 fp2 = floatx80_add(fp2, float64_to_floatx80(
1254 make_float64(0x3FE0000000000000), status),
1255 status); /* A1+S*(A3+S*A5) */
1256 fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */
1257
1258 fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */
1259 fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */
1260 fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */
1261
1262 fp0 = floatx80_mul(fp0, fact1, status);
1263 fp0 = floatx80_add(fp0, fact2, status);
1264 fp0 = floatx80_add(fp0, fact1, status);
1265
1266 status->float_rounding_mode = user_rnd_mode;
1267 status->floatx80_rounding_precision = user_rnd_prec;
1268
1269 a = floatx80_mul(fp0, adjfact, status);
1270
1271 float_raise(float_flag_inexact, status);
1272
1273 return a;
1274 }
1275}
Laurent Vivier27340182018-03-12 21:27:18 +01001276
1277/*----------------------------------------------------------------------------
1278 | Tangent
1279 *----------------------------------------------------------------------------*/
1280
1281floatx80 floatx80_tan(floatx80 a, float_status *status)
1282{
1283 flag aSign, xSign;
1284 int32_t aExp, xExp;
1285 uint64_t aSig, xSig;
1286
1287 int8_t user_rnd_mode, user_rnd_prec;
1288
1289 int32_t compact, l, n, j;
1290 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2;
1291 float32 twoto63;
1292 flag endflag;
1293
1294 aSig = extractFloatx80Frac(a);
1295 aExp = extractFloatx80Exp(a);
1296 aSign = extractFloatx80Sign(a);
1297
1298 if (aExp == 0x7FFF) {
1299 if ((uint64_t) (aSig << 1)) {
1300 return propagateFloatx80NaNOneArg(a, status);
1301 }
1302 float_raise(float_flag_invalid, status);
1303 return floatx80_default_nan(status);
1304 }
1305
1306 if (aExp == 0 && aSig == 0) {
1307 return packFloatx80(aSign, 0, 0);
1308 }
1309
1310 user_rnd_mode = status->float_rounding_mode;
1311 user_rnd_prec = status->floatx80_rounding_precision;
1312 status->float_rounding_mode = float_round_nearest_even;
1313 status->floatx80_rounding_precision = 80;
1314
1315 compact = floatx80_make_compact(aExp, aSig);
1316
1317 fp0 = a;
1318
1319 if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1320 /* 2^(-40) > |X| > 15 PI */
1321 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1322 /* REDUCEX */
1323 fp1 = packFloatx80(0, 0, 0);
1324 if (compact == 0x7FFEFFFF) {
1325 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1326 LIT64(0xC90FDAA200000000));
1327 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1328 LIT64(0x85A308D300000000));
1329 fp0 = floatx80_add(fp0, twopi1, status);
1330 fp1 = fp0;
1331 fp0 = floatx80_add(fp0, twopi2, status);
1332 fp1 = floatx80_sub(fp1, fp0, status);
1333 fp1 = floatx80_add(fp1, twopi2, status);
1334 }
1335 loop:
1336 xSign = extractFloatx80Sign(fp0);
1337 xExp = extractFloatx80Exp(fp0);
1338 xExp -= 0x3FFF;
1339 if (xExp <= 28) {
1340 l = 0;
1341 endflag = 1;
1342 } else {
1343 l = xExp - 27;
1344 endflag = 0;
1345 }
1346 invtwopi = packFloatx80(0, 0x3FFE - l,
1347 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */
1348 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000));
1349 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000));
1350
1351 /* SIGN(INARG)*2^63 IN SGL */
1352 twoto63 = packFloat32(xSign, 0xBE, 0);
1353
1354 fp2 = floatx80_mul(fp0, invtwopi, status);
1355 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1356 status); /* THE FRACT PART OF FP2 IS ROUNDED */
1357 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1358 status); /* FP2 is N */
1359 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1360 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1361 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1362 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1363 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1364 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1365 fp3 = fp0; /* FP3 is A */
1366 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1367 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1368
1369 if (endflag > 0) {
1370 n = floatx80_to_int32(fp2, status);
1371 goto tancont;
1372 }
1373 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1374 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1375 goto loop;
1376 } else {
1377 status->float_rounding_mode = user_rnd_mode;
1378 status->floatx80_rounding_precision = user_rnd_prec;
1379
1380 a = floatx80_move(a, status);
1381
1382 float_raise(float_flag_inexact, status);
1383
1384 return a;
1385 }
1386 } else {
1387 fp1 = floatx80_mul(fp0, float64_to_floatx80(
1388 make_float64(0x3FE45F306DC9C883), status),
1389 status); /* X*2/PI */
1390
1391 n = floatx80_to_int32(fp1, status);
1392 j = 32 + n;
1393
1394 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1395 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1396 status); /* FP0 IS R = (X-Y1)-Y2 */
1397
1398 tancont:
1399 if (n & 1) {
1400 /* NODD */
1401 fp1 = fp0; /* R */
1402 fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1403 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
1404 status); /* Q4 */
1405 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
1406 status); /* P3 */
1407 fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */
1408 fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */
1409 fp3 = floatx80_add(fp3, float64_to_floatx80(
1410 make_float64(0xBF346F59B39BA65F), status),
1411 status); /* Q3+SQ4 */
1412 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00));
1413 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
1414 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */
1415 fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */
1416 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1));
1417 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
1418 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA));
1419 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
1420 fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */
1421 fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */
1422 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE));
1423 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
1424 fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */
1425 fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
1426 fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */
1427 fp0 = floatx80_add(fp0, float32_to_floatx80(
1428 make_float32(0x3F800000), status),
1429 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
1430
1431 xSign = extractFloatx80Sign(fp1);
1432 xExp = extractFloatx80Exp(fp1);
1433 xSig = extractFloatx80Frac(fp1);
1434 xSign ^= 1;
1435 fp1 = packFloatx80(xSign, xExp, xSig);
1436
1437 status->float_rounding_mode = user_rnd_mode;
1438 status->floatx80_rounding_precision = user_rnd_prec;
1439
1440 a = floatx80_div(fp0, fp1, status);
1441
1442 float_raise(float_flag_inexact, status);
1443
1444 return a;
1445 } else {
1446 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
1447 fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688),
1448 status); /* Q4 */
1449 fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04),
1450 status); /* P3 */
1451 fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */
1452 fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */
1453 fp3 = floatx80_add(fp3, float64_to_floatx80(
1454 make_float64(0xBF346F59B39BA65F), status),
1455 status); /* Q3+SQ4 */
1456 fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00));
1457 fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */
1458 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */
1459 fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */
1460 fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1));
1461 fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */
1462 fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA));
1463 fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */
1464 fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */
1465 fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */
1466 fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE));
1467 fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */
1468 fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */
1469 fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */
1470 fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */
1471 fp1 = floatx80_add(fp1, float32_to_floatx80(
1472 make_float32(0x3F800000), status),
1473 status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */
1474
1475 status->float_rounding_mode = user_rnd_mode;
1476 status->floatx80_rounding_precision = user_rnd_prec;
1477
1478 a = floatx80_div(fp0, fp1, status);
1479
1480 float_raise(float_flag_inexact, status);
1481
1482 return a;
1483 }
1484 }
1485}
Laurent Vivier5add1ac2018-03-12 21:27:19 +01001486
1487/*----------------------------------------------------------------------------
1488 | Sine
1489 *----------------------------------------------------------------------------*/
1490
1491floatx80 floatx80_sin(floatx80 a, float_status *status)
1492{
1493 flag aSign, xSign;
1494 int32_t aExp, xExp;
1495 uint64_t aSig, xSig;
1496
1497 int8_t user_rnd_mode, user_rnd_prec;
1498
1499 int32_t compact, l, n, j;
1500 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
1501 float32 posneg1, twoto63;
Laurent Vivier6361d292018-04-30 19:01:56 +02001502 flag endflag;
Laurent Vivier5add1ac2018-03-12 21:27:19 +01001503
1504 aSig = extractFloatx80Frac(a);
1505 aExp = extractFloatx80Exp(a);
1506 aSign = extractFloatx80Sign(a);
1507
1508 if (aExp == 0x7FFF) {
1509 if ((uint64_t) (aSig << 1)) {
1510 return propagateFloatx80NaNOneArg(a, status);
1511 }
1512 float_raise(float_flag_invalid, status);
1513 return floatx80_default_nan(status);
1514 }
1515
1516 if (aExp == 0 && aSig == 0) {
1517 return packFloatx80(aSign, 0, 0);
1518 }
1519
Laurent Vivier5add1ac2018-03-12 21:27:19 +01001520 user_rnd_mode = status->float_rounding_mode;
1521 user_rnd_prec = status->floatx80_rounding_precision;
1522 status->float_rounding_mode = float_round_nearest_even;
1523 status->floatx80_rounding_precision = 80;
1524
1525 compact = floatx80_make_compact(aExp, aSig);
1526
1527 fp0 = a;
1528
1529 if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1530 /* 2^(-40) > |X| > 15 PI */
1531 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1532 /* REDUCEX */
1533 fp1 = packFloatx80(0, 0, 0);
1534 if (compact == 0x7FFEFFFF) {
1535 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1536 LIT64(0xC90FDAA200000000));
1537 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1538 LIT64(0x85A308D300000000));
1539 fp0 = floatx80_add(fp0, twopi1, status);
1540 fp1 = fp0;
1541 fp0 = floatx80_add(fp0, twopi2, status);
1542 fp1 = floatx80_sub(fp1, fp0, status);
1543 fp1 = floatx80_add(fp1, twopi2, status);
1544 }
1545 loop:
1546 xSign = extractFloatx80Sign(fp0);
1547 xExp = extractFloatx80Exp(fp0);
1548 xExp -= 0x3FFF;
1549 if (xExp <= 28) {
1550 l = 0;
1551 endflag = 1;
1552 } else {
1553 l = xExp - 27;
1554 endflag = 0;
1555 }
1556 invtwopi = packFloatx80(0, 0x3FFE - l,
1557 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */
1558 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000));
1559 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000));
1560
1561 /* SIGN(INARG)*2^63 IN SGL */
1562 twoto63 = packFloat32(xSign, 0xBE, 0);
1563
1564 fp2 = floatx80_mul(fp0, invtwopi, status);
1565 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1566 status); /* THE FRACT PART OF FP2 IS ROUNDED */
1567 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1568 status); /* FP2 is N */
1569 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1570 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1571 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1572 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1573 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1574 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1575 fp3 = fp0; /* FP3 is A */
1576 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1577 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1578
1579 if (endflag > 0) {
1580 n = floatx80_to_int32(fp2, status);
1581 goto sincont;
1582 }
1583 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1584 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1585 goto loop;
1586 } else {
1587 /* SINSM */
1588 fp0 = float32_to_floatx80(make_float32(0x3F800000),
1589 status); /* 1 */
1590
1591 status->float_rounding_mode = user_rnd_mode;
1592 status->floatx80_rounding_precision = user_rnd_prec;
1593
Laurent Vivier6361d292018-04-30 19:01:56 +02001594 /* SINTINY */
1595 a = floatx80_move(a, status);
Laurent Vivier5add1ac2018-03-12 21:27:19 +01001596 float_raise(float_flag_inexact, status);
1597
1598 return a;
1599 }
1600 } else {
1601 fp1 = floatx80_mul(fp0, float64_to_floatx80(
1602 make_float64(0x3FE45F306DC9C883), status),
1603 status); /* X*2/PI */
1604
1605 n = floatx80_to_int32(fp1, status);
1606 j = 32 + n;
1607
1608 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1609 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1610 status); /* FP0 IS R = (X-Y1)-Y2 */
1611
1612 sincont:
Laurent Vivier6361d292018-04-30 19:01:56 +02001613 if (n & 1) {
Laurent Vivier5add1ac2018-03-12 21:27:19 +01001614 /* COSPOLY */
1615 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1616 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1617 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
1618 status); /* B8 */
1619 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
1620 status); /* B7 */
1621
1622 xSign = extractFloatx80Sign(fp0); /* X IS S */
1623 xExp = extractFloatx80Exp(fp0);
1624 xSig = extractFloatx80Frac(fp0);
1625
Laurent Vivier6361d292018-04-30 19:01:56 +02001626 if ((n >> 1) & 1) {
Laurent Vivier5add1ac2018-03-12 21:27:19 +01001627 xSign ^= 1;
1628 posneg1 = make_float32(0xBF800000); /* -1 */
1629 } else {
1630 xSign ^= 0;
1631 posneg1 = make_float32(0x3F800000); /* 1 */
1632 } /* X IS NOW R'= SGN*R */
1633
1634 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
1635 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
1636 fp2 = floatx80_add(fp2, float64_to_floatx80(
1637 make_float64(0x3E21EED90612C972), status),
1638 status); /* B6+TB8 */
1639 fp3 = floatx80_add(fp3, float64_to_floatx80(
1640 make_float64(0xBE927E4FB79D9FCF), status),
1641 status); /* B5+TB7 */
1642 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
1643 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
1644 fp2 = floatx80_add(fp2, float64_to_floatx80(
1645 make_float64(0x3EFA01A01A01D423), status),
1646 status); /* B4+T(B6+TB8) */
1647 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438));
1648 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
1649 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
1650 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
1651 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E));
1652 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
1653 fp1 = floatx80_add(fp1, float32_to_floatx80(
1654 make_float32(0xBF000000), status),
1655 status); /* B1+T(B3+T(B5+TB7)) */
1656 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
1657 fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+
1658 * [S(B2+T(B4+T(B6+TB8)))]
1659 */
1660
1661 x = packFloatx80(xSign, xExp, xSig);
1662 fp0 = floatx80_mul(fp0, x, status);
1663
1664 status->float_rounding_mode = user_rnd_mode;
1665 status->floatx80_rounding_precision = user_rnd_prec;
1666
1667 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
1668
1669 float_raise(float_flag_inexact, status);
1670
1671 return a;
1672 } else {
1673 /* SINPOLY */
1674 xSign = extractFloatx80Sign(fp0); /* X IS R */
1675 xExp = extractFloatx80Exp(fp0);
1676 xSig = extractFloatx80Frac(fp0);
1677
Laurent Vivier6361d292018-04-30 19:01:56 +02001678 xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */
Laurent Vivier5add1ac2018-03-12 21:27:19 +01001679
1680 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1681 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1682 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
1683 status); /* A7 */
1684 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
1685 status); /* A6 */
1686 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
1687 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
1688 fp3 = floatx80_add(fp3, float64_to_floatx80(
1689 make_float64(0xBE5AE6452A118AE4), status),
1690 status); /* A5+T*A7 */
1691 fp2 = floatx80_add(fp2, float64_to_floatx80(
1692 make_float64(0x3EC71DE3A5341531), status),
1693 status); /* A4+T*A6 */
1694 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
1695 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
1696 fp3 = floatx80_add(fp3, float64_to_floatx80(
1697 make_float64(0xBF2A01A01A018B59), status),
1698 status); /* A3+T(A5+TA7) */
1699 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF));
1700 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
1701 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
1702 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
1703 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99));
1704 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
1705 fp1 = floatx80_add(fp1, fp2,
1706 status); /* [A1+T(A3+T(A5+TA7))]+
1707 * [S(A2+T(A4+TA6))]
1708 */
1709
1710 x = packFloatx80(xSign, xExp, xSig);
1711 fp0 = floatx80_mul(fp0, x, status); /* R'*S */
1712 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
1713
1714 status->float_rounding_mode = user_rnd_mode;
1715 status->floatx80_rounding_precision = user_rnd_prec;
1716
1717 a = floatx80_add(fp0, x, status);
1718
1719 float_raise(float_flag_inexact, status);
1720
1721 return a;
1722 }
1723 }
1724}
Laurent Vivier68d0ed32018-03-12 21:27:20 +01001725
1726/*----------------------------------------------------------------------------
1727 | Cosine
1728 *----------------------------------------------------------------------------*/
1729
1730floatx80 floatx80_cos(floatx80 a, float_status *status)
1731{
1732 flag aSign, xSign;
1733 int32_t aExp, xExp;
1734 uint64_t aSig, xSig;
1735
1736 int8_t user_rnd_mode, user_rnd_prec;
1737
1738 int32_t compact, l, n, j;
1739 floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2;
1740 float32 posneg1, twoto63;
Laurent Vivier6361d292018-04-30 19:01:56 +02001741 flag endflag;
Laurent Vivier68d0ed32018-03-12 21:27:20 +01001742
1743 aSig = extractFloatx80Frac(a);
1744 aExp = extractFloatx80Exp(a);
1745 aSign = extractFloatx80Sign(a);
1746
1747 if (aExp == 0x7FFF) {
1748 if ((uint64_t) (aSig << 1)) {
1749 return propagateFloatx80NaNOneArg(a, status);
1750 }
1751 float_raise(float_flag_invalid, status);
1752 return floatx80_default_nan(status);
1753 }
1754
1755 if (aExp == 0 && aSig == 0) {
1756 return packFloatx80(0, one_exp, one_sig);
1757 }
1758
Laurent Vivier68d0ed32018-03-12 21:27:20 +01001759 user_rnd_mode = status->float_rounding_mode;
1760 user_rnd_prec = status->floatx80_rounding_precision;
1761 status->float_rounding_mode = float_round_nearest_even;
1762 status->floatx80_rounding_precision = 80;
1763
1764 compact = floatx80_make_compact(aExp, aSig);
1765
1766 fp0 = a;
1767
1768 if (compact < 0x3FD78000 || compact > 0x4004BC7E) {
1769 /* 2^(-40) > |X| > 15 PI */
1770 if (compact > 0x3FFF8000) { /* |X| >= 15 PI */
1771 /* REDUCEX */
1772 fp1 = packFloatx80(0, 0, 0);
1773 if (compact == 0x7FFEFFFF) {
1774 twopi1 = packFloatx80(aSign ^ 1, 0x7FFE,
1775 LIT64(0xC90FDAA200000000));
1776 twopi2 = packFloatx80(aSign ^ 1, 0x7FDC,
1777 LIT64(0x85A308D300000000));
1778 fp0 = floatx80_add(fp0, twopi1, status);
1779 fp1 = fp0;
1780 fp0 = floatx80_add(fp0, twopi2, status);
1781 fp1 = floatx80_sub(fp1, fp0, status);
1782 fp1 = floatx80_add(fp1, twopi2, status);
1783 }
1784 loop:
1785 xSign = extractFloatx80Sign(fp0);
1786 xExp = extractFloatx80Exp(fp0);
1787 xExp -= 0x3FFF;
1788 if (xExp <= 28) {
1789 l = 0;
1790 endflag = 1;
1791 } else {
1792 l = xExp - 27;
1793 endflag = 0;
1794 }
1795 invtwopi = packFloatx80(0, 0x3FFE - l,
1796 LIT64(0xA2F9836E4E44152A)); /* INVTWOPI */
1797 twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000));
1798 twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000));
1799
1800 /* SIGN(INARG)*2^63 IN SGL */
1801 twoto63 = packFloat32(xSign, 0xBE, 0);
1802
1803 fp2 = floatx80_mul(fp0, invtwopi, status);
1804 fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status),
1805 status); /* THE FRACT PART OF FP2 IS ROUNDED */
1806 fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status),
1807 status); /* FP2 is N */
1808 fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */
1809 fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */
1810 fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */
1811 fp4 = floatx80_sub(fp4, fp3, status); /* W-P */
1812 fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */
1813 fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */
1814 fp3 = fp0; /* FP3 is A */
1815 fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */
1816 fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */
1817
1818 if (endflag > 0) {
1819 n = floatx80_to_int32(fp2, status);
1820 goto sincont;
1821 }
1822 fp3 = floatx80_sub(fp3, fp0, status); /* A-R */
1823 fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */
1824 goto loop;
1825 } else {
1826 /* SINSM */
1827 fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */
1828
1829 status->float_rounding_mode = user_rnd_mode;
1830 status->floatx80_rounding_precision = user_rnd_prec;
1831
Laurent Vivier6361d292018-04-30 19:01:56 +02001832 /* COSTINY */
1833 a = floatx80_sub(fp0, float32_to_floatx80(
1834 make_float32(0x00800000), status),
1835 status);
Laurent Vivier68d0ed32018-03-12 21:27:20 +01001836 float_raise(float_flag_inexact, status);
1837
1838 return a;
1839 }
1840 } else {
1841 fp1 = floatx80_mul(fp0, float64_to_floatx80(
1842 make_float64(0x3FE45F306DC9C883), status),
1843 status); /* X*2/PI */
1844
1845 n = floatx80_to_int32(fp1, status);
1846 j = 32 + n;
1847
1848 fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */
1849 fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status),
1850 status); /* FP0 IS R = (X-Y1)-Y2 */
1851
1852 sincont:
Laurent Vivier6361d292018-04-30 19:01:56 +02001853 if ((n + 1) & 1) {
Laurent Vivier68d0ed32018-03-12 21:27:20 +01001854 /* COSPOLY */
1855 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1856 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1857 fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3),
1858 status); /* B8 */
1859 fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19),
1860 status); /* B7 */
1861
1862 xSign = extractFloatx80Sign(fp0); /* X IS S */
1863 xExp = extractFloatx80Exp(fp0);
1864 xSig = extractFloatx80Frac(fp0);
1865
Laurent Vivier6361d292018-04-30 19:01:56 +02001866 if (((n + 1) >> 1) & 1) {
Laurent Vivier68d0ed32018-03-12 21:27:20 +01001867 xSign ^= 1;
1868 posneg1 = make_float32(0xBF800000); /* -1 */
1869 } else {
1870 xSign ^= 0;
1871 posneg1 = make_float32(0x3F800000); /* 1 */
1872 } /* X IS NOW R'= SGN*R */
1873
1874 fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */
1875 fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */
1876 fp2 = floatx80_add(fp2, float64_to_floatx80(
1877 make_float64(0x3E21EED90612C972), status),
1878 status); /* B6+TB8 */
1879 fp3 = floatx80_add(fp3, float64_to_floatx80(
1880 make_float64(0xBE927E4FB79D9FCF), status),
1881 status); /* B5+TB7 */
1882 fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */
1883 fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */
1884 fp2 = floatx80_add(fp2, float64_to_floatx80(
1885 make_float64(0x3EFA01A01A01D423), status),
1886 status); /* B4+T(B6+TB8) */
1887 fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438));
1888 fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */
1889 fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */
1890 fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */
1891 fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E));
1892 fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */
1893 fp1 = floatx80_add(fp1, float32_to_floatx80(
1894 make_float32(0xBF000000), status),
1895 status); /* B1+T(B3+T(B5+TB7)) */
1896 fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */
1897 fp0 = floatx80_add(fp0, fp1, status);
1898 /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */
1899
1900 x = packFloatx80(xSign, xExp, xSig);
1901 fp0 = floatx80_mul(fp0, x, status);
1902
1903 status->float_rounding_mode = user_rnd_mode;
1904 status->floatx80_rounding_precision = user_rnd_prec;
1905
1906 a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status);
1907
1908 float_raise(float_flag_inexact, status);
1909
1910 return a;
1911 } else {
1912 /* SINPOLY */
1913 xSign = extractFloatx80Sign(fp0); /* X IS R */
1914 xExp = extractFloatx80Exp(fp0);
1915 xSig = extractFloatx80Frac(fp0);
1916
Laurent Vivier6361d292018-04-30 19:01:56 +02001917 xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */
Laurent Vivier68d0ed32018-03-12 21:27:20 +01001918
1919 fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */
1920 fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */
1921 fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5),
1922 status); /* A7 */
1923 fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1),
1924 status); /* A6 */
1925 fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */
1926 fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */
1927 fp3 = floatx80_add(fp3, float64_to_floatx80(
1928 make_float64(0xBE5AE6452A118AE4), status),
1929 status); /* A5+T*A7 */
1930 fp2 = floatx80_add(fp2, float64_to_floatx80(
1931 make_float64(0x3EC71DE3A5341531), status),
1932 status); /* A4+T*A6 */
1933 fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */
1934 fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */
1935 fp3 = floatx80_add(fp3, float64_to_floatx80(
1936 make_float64(0xBF2A01A01A018B59), status),
1937 status); /* A3+T(A5+TA7) */
1938 fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF));
1939 fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */
1940 fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */
1941 fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */
1942 fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99));
1943 fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */
1944 fp1 = floatx80_add(fp1, fp2, status);
1945 /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */
1946
1947 x = packFloatx80(xSign, xExp, xSig);
1948 fp0 = floatx80_mul(fp0, x, status); /* R'*S */
1949 fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */
1950
1951 status->float_rounding_mode = user_rnd_mode;
1952 status->floatx80_rounding_precision = user_rnd_prec;
1953
1954 a = floatx80_add(fp0, x, status);
1955
1956 float_raise(float_flag_inexact, status);
1957
1958 return a;
1959 }
1960 }
1961}
Laurent Vivier8c992ab2018-03-12 21:27:22 +01001962
1963/*----------------------------------------------------------------------------
1964 | Arc tangent
1965 *----------------------------------------------------------------------------*/
1966
1967floatx80 floatx80_atan(floatx80 a, float_status *status)
1968{
1969 flag aSign;
1970 int32_t aExp;
1971 uint64_t aSig;
1972
1973 int8_t user_rnd_mode, user_rnd_prec;
1974
1975 int32_t compact, tbl_index;
1976 floatx80 fp0, fp1, fp2, fp3, xsave;
1977
1978 aSig = extractFloatx80Frac(a);
1979 aExp = extractFloatx80Exp(a);
1980 aSign = extractFloatx80Sign(a);
1981
1982 if (aExp == 0x7FFF) {
1983 if ((uint64_t) (aSig << 1)) {
1984 return propagateFloatx80NaNOneArg(a, status);
1985 }
1986 a = packFloatx80(aSign, piby2_exp, pi_sig);
1987 float_raise(float_flag_inexact, status);
1988 return floatx80_move(a, status);
1989 }
1990
1991 if (aExp == 0 && aSig == 0) {
1992 return packFloatx80(aSign, 0, 0);
1993 }
1994
1995 compact = floatx80_make_compact(aExp, aSig);
1996
1997 user_rnd_mode = status->float_rounding_mode;
1998 user_rnd_prec = status->floatx80_rounding_precision;
1999 status->float_rounding_mode = float_round_nearest_even;
2000 status->floatx80_rounding_precision = 80;
2001
2002 if (compact < 0x3FFB8000 || compact > 0x4002FFFF) {
2003 /* |X| >= 16 or |X| < 1/16 */
2004 if (compact > 0x3FFF8000) { /* |X| >= 16 */
2005 if (compact > 0x40638000) { /* |X| > 2^(100) */
2006 fp0 = packFloatx80(aSign, piby2_exp, pi_sig);
2007 fp1 = packFloatx80(aSign, 0x0001, one_sig);
2008
2009 status->float_rounding_mode = user_rnd_mode;
2010 status->floatx80_rounding_precision = user_rnd_prec;
2011
2012 a = floatx80_sub(fp0, fp1, status);
2013
2014 float_raise(float_flag_inexact, status);
2015
2016 return a;
2017 } else {
2018 fp0 = a;
2019 fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */
2020 fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */
2021 xsave = fp1;
2022 fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */
2023 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
2024 fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A),
2025 status); /* C5 */
2026 fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D),
2027 status); /* C4 */
2028 fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */
2029 fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */
2030 fp3 = floatx80_add(fp3, float64_to_floatx80(
2031 make_float64(0xBFC24924827107B8), status),
2032 status); /* C3+Z*C5 */
2033 fp2 = floatx80_add(fp2, float64_to_floatx80(
2034 make_float64(0x3FC999999996263E), status),
2035 status); /* C2+Z*C4 */
2036 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */
2037 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */
2038 fp1 = floatx80_add(fp1, float64_to_floatx80(
2039 make_float64(0xBFD5555555555536), status),
2040 status); /* C1+Z*(C3+Z*C5) */
2041 fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */
2042 /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */
2043 fp1 = floatx80_add(fp1, fp2, status);
2044 /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */
2045 fp0 = floatx80_mul(fp0, fp1, status);
2046 fp0 = floatx80_add(fp0, xsave, status);
2047 fp1 = packFloatx80(aSign, piby2_exp, pi_sig);
2048
2049 status->float_rounding_mode = user_rnd_mode;
2050 status->floatx80_rounding_precision = user_rnd_prec;
2051
2052 a = floatx80_add(fp0, fp1, status);
2053
2054 float_raise(float_flag_inexact, status);
2055
2056 return a;
2057 }
2058 } else { /* |X| < 1/16 */
2059 if (compact < 0x3FD78000) { /* |X| < 2^(-40) */
2060 status->float_rounding_mode = user_rnd_mode;
2061 status->floatx80_rounding_precision = user_rnd_prec;
2062
2063 a = floatx80_move(a, status);
2064
2065 float_raise(float_flag_inexact, status);
2066
2067 return a;
2068 } else {
2069 fp0 = a;
2070 xsave = a;
2071 fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */
2072 fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */
2073 fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989),
2074 status); /* B6 */
2075 fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB),
2076 status); /* B5 */
2077 fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */
2078 fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */
2079 fp2 = floatx80_add(fp2, float64_to_floatx80(
2080 make_float64(0x3FBC71C646940220), status),
2081 status); /* B4+Z*B6 */
2082 fp3 = floatx80_add(fp3, float64_to_floatx80(
2083 make_float64(0xBFC24924921872F9),
2084 status), status); /* B3+Z*B5 */
2085 fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */
2086 fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */
2087 fp2 = floatx80_add(fp2, float64_to_floatx80(
2088 make_float64(0x3FC9999999998FA9), status),
2089 status); /* B2+Z*(B4+Z*B6) */
2090 fp1 = floatx80_add(fp1, float64_to_floatx80(
2091 make_float64(0xBFD5555555555555), status),
2092 status); /* B1+Z*(B3+Z*B5) */
2093 fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */
2094 fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */
2095 /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */
2096 fp1 = floatx80_add(fp1, fp2, status);
2097 /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */
2098 fp0 = floatx80_mul(fp0, fp1, status);
2099
2100 status->float_rounding_mode = user_rnd_mode;
2101 status->floatx80_rounding_precision = user_rnd_prec;
2102
2103 a = floatx80_add(fp0, xsave, status);
2104
2105 float_raise(float_flag_inexact, status);
2106
2107 return a;
2108 }
2109 }
2110 } else {
2111 aSig &= LIT64(0xF800000000000000);
2112 aSig |= LIT64(0x0400000000000000);
2113 xsave = packFloatx80(aSign, aExp, aSig); /* F */
2114 fp0 = a;
2115 fp1 = a; /* X */
2116 fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */
2117 fp1 = floatx80_mul(fp1, xsave, status); /* X*F */
2118 fp0 = floatx80_sub(fp0, xsave, status); /* X-F */
2119 fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */
2120 fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */
2121
2122 tbl_index = compact;
2123
2124 tbl_index &= 0x7FFF0000;
2125 tbl_index -= 0x3FFB0000;
2126 tbl_index >>= 1;
2127 tbl_index += compact & 0x00007800;
2128 tbl_index >>= 11;
2129
2130 fp3 = atan_tbl[tbl_index];
2131
2132 fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */
2133
2134 fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */
2135 fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8),
2136 status); /* A3 */
2137 fp2 = floatx80_add(fp2, fp1, status); /* A3+V */
2138 fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */
2139 fp1 = floatx80_mul(fp1, fp0, status); /* U*V */
2140 fp2 = floatx80_add(fp2, float64_to_floatx80(
2141 make_float64(0x4002AC6934A26DB3), status),
2142 status); /* A2+V*(A3+V) */
2143 fp1 = floatx80_mul(fp1, float64_to_floatx80(
2144 make_float64(0xBFC2476F4E1DA28E), status),
2145 status); /* A1+U*V */
2146 fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */
2147 fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */
2148
2149 status->float_rounding_mode = user_rnd_mode;
2150 status->floatx80_rounding_precision = user_rnd_prec;
2151
2152 a = floatx80_add(fp0, fp3, status); /* ATAN(X) */
2153
2154 float_raise(float_flag_inexact, status);
2155
2156 return a;
2157 }
2158}
Laurent Vivierbc20b342018-03-12 21:27:23 +01002159
2160/*----------------------------------------------------------------------------
2161 | Arc sine
2162 *----------------------------------------------------------------------------*/
2163
2164floatx80 floatx80_asin(floatx80 a, float_status *status)
2165{
2166 flag aSign;
2167 int32_t aExp;
2168 uint64_t aSig;
2169
2170 int8_t user_rnd_mode, user_rnd_prec;
2171
2172 int32_t compact;
2173 floatx80 fp0, fp1, fp2, one;
2174
2175 aSig = extractFloatx80Frac(a);
2176 aExp = extractFloatx80Exp(a);
2177 aSign = extractFloatx80Sign(a);
2178
2179 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2180 return propagateFloatx80NaNOneArg(a, status);
2181 }
2182
2183 if (aExp == 0 && aSig == 0) {
2184 return packFloatx80(aSign, 0, 0);
2185 }
2186
2187 compact = floatx80_make_compact(aExp, aSig);
2188
2189 if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2190 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2191 float_raise(float_flag_inexact, status);
2192 a = packFloatx80(aSign, piby2_exp, pi_sig);
2193 return floatx80_move(a, status);
2194 } else { /* |X| > 1 */
2195 float_raise(float_flag_invalid, status);
2196 return floatx80_default_nan(status);
2197 }
2198
2199 } /* |X| < 1 */
2200
2201 user_rnd_mode = status->float_rounding_mode;
2202 user_rnd_prec = status->floatx80_rounding_precision;
2203 status->float_rounding_mode = float_round_nearest_even;
2204 status->floatx80_rounding_precision = 80;
2205
2206 one = packFloatx80(0, one_exp, one_sig);
2207 fp0 = a;
2208
2209 fp1 = floatx80_sub(one, fp0, status); /* 1 - X */
2210 fp2 = floatx80_add(one, fp0, status); /* 1 + X */
2211 fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */
2212 fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */
2213 fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */
2214
2215 status->float_rounding_mode = user_rnd_mode;
2216 status->floatx80_rounding_precision = user_rnd_prec;
2217
2218 a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */
2219
2220 float_raise(float_flag_inexact, status);
2221
2222 return a;
2223}
Laurent Vivierc84813b2018-03-12 21:27:24 +01002224
2225/*----------------------------------------------------------------------------
2226 | Arc cosine
2227 *----------------------------------------------------------------------------*/
2228
2229floatx80 floatx80_acos(floatx80 a, float_status *status)
2230{
2231 flag aSign;
2232 int32_t aExp;
2233 uint64_t aSig;
2234
2235 int8_t user_rnd_mode, user_rnd_prec;
2236
2237 int32_t compact;
2238 floatx80 fp0, fp1, one;
2239
2240 aSig = extractFloatx80Frac(a);
2241 aExp = extractFloatx80Exp(a);
2242 aSign = extractFloatx80Sign(a);
2243
2244 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2245 return propagateFloatx80NaNOneArg(a, status);
2246 }
2247 if (aExp == 0 && aSig == 0) {
2248 float_raise(float_flag_inexact, status);
2249 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
2250 piby2_exp, pi_sig, 0, status);
2251 }
2252
2253 compact = floatx80_make_compact(aExp, aSig);
2254
2255 if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2256 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2257 if (aSign) { /* X == -1 */
2258 a = packFloatx80(0, pi_exp, pi_sig);
2259 float_raise(float_flag_inexact, status);
2260 return floatx80_move(a, status);
2261 } else { /* X == +1 */
2262 return packFloatx80(0, 0, 0);
2263 }
2264 } else { /* |X| > 1 */
2265 float_raise(float_flag_invalid, status);
2266 return floatx80_default_nan(status);
2267 }
2268 } /* |X| < 1 */
2269
2270 user_rnd_mode = status->float_rounding_mode;
2271 user_rnd_prec = status->floatx80_rounding_precision;
2272 status->float_rounding_mode = float_round_nearest_even;
2273 status->floatx80_rounding_precision = 80;
2274
2275 one = packFloatx80(0, one_exp, one_sig);
2276 fp0 = a;
2277
2278 fp1 = floatx80_add(one, fp0, status); /* 1 + X */
2279 fp0 = floatx80_sub(one, fp0, status); /* 1 - X */
2280 fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */
2281 fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */
2282 fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */
2283
2284 status->float_rounding_mode = user_rnd_mode;
2285 status->floatx80_rounding_precision = user_rnd_prec;
2286
2287 a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */
2288
2289 float_raise(float_flag_inexact, status);
2290
2291 return a;
2292}
Laurent Viviere3655af2018-03-12 21:27:25 +01002293
2294/*----------------------------------------------------------------------------
2295 | Hyperbolic arc tangent
2296 *----------------------------------------------------------------------------*/
2297
2298floatx80 floatx80_atanh(floatx80 a, float_status *status)
2299{
2300 flag aSign;
2301 int32_t aExp;
2302 uint64_t aSig;
2303
2304 int8_t user_rnd_mode, user_rnd_prec;
2305
2306 int32_t compact;
2307 floatx80 fp0, fp1, fp2, one;
2308
2309 aSig = extractFloatx80Frac(a);
2310 aExp = extractFloatx80Exp(a);
2311 aSign = extractFloatx80Sign(a);
2312
2313 if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) {
2314 return propagateFloatx80NaNOneArg(a, status);
2315 }
2316
2317 if (aExp == 0 && aSig == 0) {
2318 return packFloatx80(aSign, 0, 0);
2319 }
2320
2321 compact = floatx80_make_compact(aExp, aSig);
2322
2323 if (compact >= 0x3FFF8000) { /* |X| >= 1 */
2324 if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */
2325 float_raise(float_flag_divbyzero, status);
2326 return packFloatx80(aSign, floatx80_infinity.high,
2327 floatx80_infinity.low);
2328 } else { /* |X| > 1 */
2329 float_raise(float_flag_invalid, status);
2330 return floatx80_default_nan(status);
2331 }
2332 } /* |X| < 1 */
2333
2334 user_rnd_mode = status->float_rounding_mode;
2335 user_rnd_prec = status->floatx80_rounding_precision;
2336 status->float_rounding_mode = float_round_nearest_even;
2337 status->floatx80_rounding_precision = 80;
2338
2339 one = packFloatx80(0, one_exp, one_sig);
2340 fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */
2341 fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */
2342 fp1 = packFloatx80(1, aExp, aSig); /* -Y */
2343 fp0 = floatx80_add(fp0, fp0, status); /* 2Y */
2344 fp1 = floatx80_add(fp1, one, status); /* 1-Y */
2345 fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */
2346 fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */
2347
2348 status->float_rounding_mode = user_rnd_mode;
2349 status->floatx80_rounding_precision = user_rnd_prec;
2350
2351 a = floatx80_mul(fp0, fp2,
2352 status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */
2353
2354 float_raise(float_flag_inexact, status);
2355
2356 return a;
2357}
Laurent Vivier9937b022018-03-12 21:27:26 +01002358
2359/*----------------------------------------------------------------------------
2360 | e to x minus 1
2361 *----------------------------------------------------------------------------*/
2362
2363floatx80 floatx80_etoxm1(floatx80 a, float_status *status)
2364{
2365 flag aSign;
2366 int32_t aExp;
2367 uint64_t aSig;
2368
2369 int8_t user_rnd_mode, user_rnd_prec;
2370
2371 int32_t compact, n, j, m, m1;
2372 floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc;
2373
2374 aSig = extractFloatx80Frac(a);
2375 aExp = extractFloatx80Exp(a);
2376 aSign = extractFloatx80Sign(a);
2377
2378 if (aExp == 0x7FFF) {
2379 if ((uint64_t) (aSig << 1)) {
2380 return propagateFloatx80NaNOneArg(a, status);
2381 }
2382 if (aSign) {
2383 return packFloatx80(aSign, one_exp, one_sig);
2384 }
2385 return packFloatx80(0, floatx80_infinity.high,
2386 floatx80_infinity.low);
2387 }
2388
2389 if (aExp == 0 && aSig == 0) {
2390 return packFloatx80(aSign, 0, 0);
2391 }
2392
2393 user_rnd_mode = status->float_rounding_mode;
2394 user_rnd_prec = status->floatx80_rounding_precision;
2395 status->float_rounding_mode = float_round_nearest_even;
2396 status->floatx80_rounding_precision = 80;
2397
2398 if (aExp >= 0x3FFD) { /* |X| >= 1/4 */
2399 compact = floatx80_make_compact(aExp, aSig);
2400
2401 if (compact <= 0x4004C215) { /* |X| <= 70 log2 */
2402 fp0 = a;
2403 fp1 = a;
2404 fp0 = floatx80_mul(fp0, float32_to_floatx80(
2405 make_float32(0x42B8AA3B), status),
2406 status); /* 64/log2 * X */
2407 n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */
2408 fp0 = int32_to_floatx80(n, status);
2409
2410 j = n & 0x3F; /* J = N mod 64 */
2411 m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */
2412 if (n < 0 && j) {
2413 /* arithmetic right shift is division and
2414 * round towards minus infinity
2415 */
2416 m--;
2417 }
2418 m1 = -m;
2419 /*m += 0x3FFF; // biased exponent of 2^(M) */
2420 /*m1 += 0x3FFF; // biased exponent of -2^(-M) */
2421
2422 fp2 = fp0; /* N */
2423 fp0 = floatx80_mul(fp0, float32_to_floatx80(
2424 make_float32(0xBC317218), status),
2425 status); /* N * L1, L1 = lead(-log2/64) */
2426 l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6));
2427 fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */
2428 fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */
2429 fp0 = floatx80_add(fp0, fp2, status); /* R */
2430
2431 fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */
2432 fp2 = float32_to_floatx80(make_float32(0x3950097B),
2433 status); /* A6 */
2434 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */
2435 fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A),
2436 status), fp1, status); /* fp3 is S*A5 */
2437 fp2 = floatx80_add(fp2, float64_to_floatx80(
2438 make_float64(0x3F81111111174385), status),
2439 status); /* fp2 IS A4+S*A6 */
2440 fp3 = floatx80_add(fp3, float64_to_floatx80(
2441 make_float64(0x3FA5555555554F5A), status),
2442 status); /* fp3 is A3+S*A5 */
2443 fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */
2444 fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */
2445 fp2 = floatx80_add(fp2, float64_to_floatx80(
2446 make_float64(0x3FC5555555555555), status),
2447 status); /* fp2 IS A2+S*(A4+S*A6) */
2448 fp3 = floatx80_add(fp3, float32_to_floatx80(
2449 make_float32(0x3F000000), status),
2450 status); /* fp3 IS A1+S*(A3+S*A5) */
2451 fp2 = floatx80_mul(fp2, fp1,
2452 status); /* fp2 IS S*(A2+S*(A4+S*A6)) */
2453 fp1 = floatx80_mul(fp1, fp3,
2454 status); /* fp1 IS S*(A1+S*(A3+S*A5)) */
2455 fp2 = floatx80_mul(fp2, fp0,
2456 status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */
2457 fp0 = floatx80_add(fp0, fp1,
2458 status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */
2459 fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */
2460
2461 fp0 = floatx80_mul(fp0, exp_tbl[j],
2462 status); /* 2^(J/64)*(Exp(R)-1) */
2463
2464 if (m >= 64) {
2465 fp1 = float32_to_floatx80(exp_tbl2[j], status);
2466 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2467 fp1 = floatx80_add(fp1, onebysc, status);
2468 fp0 = floatx80_add(fp0, fp1, status);
2469 fp0 = floatx80_add(fp0, exp_tbl[j], status);
2470 } else if (m < -3) {
2471 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
2472 status), status);
2473 fp0 = floatx80_add(fp0, exp_tbl[j], status);
2474 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2475 fp0 = floatx80_add(fp0, onebysc, status);
2476 } else { /* -3 <= m <= 63 */
2477 fp1 = exp_tbl[j];
2478 fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j],
2479 status), status);
2480 onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */
2481 fp1 = floatx80_add(fp1, onebysc, status);
2482 fp0 = floatx80_add(fp0, fp1, status);
2483 }
2484
2485 sc = packFloatx80(0, m + 0x3FFF, one_sig);
2486
2487 status->float_rounding_mode = user_rnd_mode;
2488 status->floatx80_rounding_precision = user_rnd_prec;
2489
2490 a = floatx80_mul(fp0, sc, status);
2491
2492 float_raise(float_flag_inexact, status);
2493
2494 return a;
2495 } else { /* |X| > 70 log2 */
2496 if (aSign) {
2497 fp0 = float32_to_floatx80(make_float32(0xBF800000),
2498 status); /* -1 */
2499
2500 status->float_rounding_mode = user_rnd_mode;
2501 status->floatx80_rounding_precision = user_rnd_prec;
2502
2503 a = floatx80_add(fp0, float32_to_floatx80(
2504 make_float32(0x00800000), status),
2505 status); /* -1 + 2^(-126) */
2506
2507 float_raise(float_flag_inexact, status);
2508
2509 return a;
2510 } else {
2511 status->float_rounding_mode = user_rnd_mode;
2512 status->floatx80_rounding_precision = user_rnd_prec;
2513
2514 return floatx80_etox(a, status);
2515 }
2516 }
2517 } else { /* |X| < 1/4 */
2518 if (aExp >= 0x3FBE) {
2519 fp0 = a;
2520 fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */
2521 fp1 = float32_to_floatx80(make_float32(0x2F30CAA8),
2522 status); /* B12 */
2523 fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */
2524 fp2 = float32_to_floatx80(make_float32(0x310F8290),
2525 status); /* B11 */
2526 fp1 = floatx80_add(fp1, float32_to_floatx80(
2527 make_float32(0x32D73220), status),
2528 status); /* B10 */
2529 fp2 = floatx80_mul(fp2, fp0, status);
2530 fp1 = floatx80_mul(fp1, fp0, status);
2531 fp2 = floatx80_add(fp2, float32_to_floatx80(
2532 make_float32(0x3493F281), status),
2533 status); /* B9 */
2534 fp1 = floatx80_add(fp1, float64_to_floatx80(
2535 make_float64(0x3EC71DE3A5774682), status),
2536 status); /* B8 */
2537 fp2 = floatx80_mul(fp2, fp0, status);
2538 fp1 = floatx80_mul(fp1, fp0, status);
2539 fp2 = floatx80_add(fp2, float64_to_floatx80(
2540 make_float64(0x3EFA01A019D7CB68), status),
2541 status); /* B7 */
2542 fp1 = floatx80_add(fp1, float64_to_floatx80(
2543 make_float64(0x3F2A01A01A019DF3), status),
2544 status); /* B6 */
2545 fp2 = floatx80_mul(fp2, fp0, status);
2546 fp1 = floatx80_mul(fp1, fp0, status);
2547 fp2 = floatx80_add(fp2, float64_to_floatx80(
2548 make_float64(0x3F56C16C16C170E2), status),
2549 status); /* B5 */
2550 fp1 = floatx80_add(fp1, float64_to_floatx80(
2551 make_float64(0x3F81111111111111), status),
2552 status); /* B4 */
2553 fp2 = floatx80_mul(fp2, fp0, status);
2554 fp1 = floatx80_mul(fp1, fp0, status);
2555 fp2 = floatx80_add(fp2, float64_to_floatx80(
2556 make_float64(0x3FA5555555555555), status),
2557 status); /* B3 */
2558 fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB));
2559 fp1 = floatx80_add(fp1, fp3, status); /* B2 */
2560 fp2 = floatx80_mul(fp2, fp0, status);
2561 fp1 = floatx80_mul(fp1, fp0, status);
2562
2563 fp2 = floatx80_mul(fp2, fp0, status);
2564 fp1 = floatx80_mul(fp1, a, status);
2565
2566 fp0 = floatx80_mul(fp0, float32_to_floatx80(
2567 make_float32(0x3F000000), status),
2568 status); /* S*B1 */
2569 fp1 = floatx80_add(fp1, fp2, status); /* Q */
2570 fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */
2571
2572 status->float_rounding_mode = user_rnd_mode;
2573 status->floatx80_rounding_precision = user_rnd_prec;
2574
2575 a = floatx80_add(fp0, a, status);
2576
2577 float_raise(float_flag_inexact, status);
2578
2579 return a;
2580 } else { /* |X| < 2^(-65) */
2581 sc = packFloatx80(1, 1, one_sig);
2582 fp0 = a;
2583
2584 if (aExp < 0x0033) { /* |X| < 2^(-16382) */
2585 fp0 = floatx80_mul(fp0, float64_to_floatx80(
2586 make_float64(0x48B0000000000000), status),
2587 status);
2588 fp0 = floatx80_add(fp0, sc, status);
2589
2590 status->float_rounding_mode = user_rnd_mode;
2591 status->floatx80_rounding_precision = user_rnd_prec;
2592
2593 a = floatx80_mul(fp0, float64_to_floatx80(
2594 make_float64(0x3730000000000000), status),
2595 status);
2596 } else {
2597 status->float_rounding_mode = user_rnd_mode;
2598 status->floatx80_rounding_precision = user_rnd_prec;
2599
2600 a = floatx80_add(fp0, sc, status);
2601 }
2602
2603 float_raise(float_flag_inexact, status);
2604
2605 return a;
2606 }
2607 }
2608}
2609
2610/*----------------------------------------------------------------------------
2611 | Hyperbolic tangent
2612 *----------------------------------------------------------------------------*/
2613
2614floatx80 floatx80_tanh(floatx80 a, float_status *status)
2615{
2616 flag aSign, vSign;
2617 int32_t aExp, vExp;
2618 uint64_t aSig, vSig;
2619
2620 int8_t user_rnd_mode, user_rnd_prec;
2621
2622 int32_t compact;
2623 floatx80 fp0, fp1;
2624 uint32_t sign;
2625
2626 aSig = extractFloatx80Frac(a);
2627 aExp = extractFloatx80Exp(a);
2628 aSign = extractFloatx80Sign(a);
2629
2630 if (aExp == 0x7FFF) {
2631 if ((uint64_t) (aSig << 1)) {
2632 return propagateFloatx80NaNOneArg(a, status);
2633 }
2634 return packFloatx80(aSign, one_exp, one_sig);
2635 }
2636
2637 if (aExp == 0 && aSig == 0) {
2638 return packFloatx80(aSign, 0, 0);
2639 }
2640
2641 user_rnd_mode = status->float_rounding_mode;
2642 user_rnd_prec = status->floatx80_rounding_precision;
2643 status->float_rounding_mode = float_round_nearest_even;
2644 status->floatx80_rounding_precision = 80;
2645
2646 compact = floatx80_make_compact(aExp, aSig);
2647
2648 if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) {
2649 /* TANHBORS */
2650 if (compact < 0x3FFF8000) {
2651 /* TANHSM */
2652 status->float_rounding_mode = user_rnd_mode;
2653 status->floatx80_rounding_precision = user_rnd_prec;
2654
2655 a = floatx80_move(a, status);
2656
2657 float_raise(float_flag_inexact, status);
2658
2659 return a;
2660 } else {
2661 if (compact > 0x40048AA1) {
2662 /* TANHHUGE */
2663 sign = 0x3F800000;
2664 sign |= aSign ? 0x80000000 : 0x00000000;
2665 fp0 = float32_to_floatx80(make_float32(sign), status);
2666 sign &= 0x80000000;
2667 sign ^= 0x80800000; /* -SIGN(X)*EPS */
2668
2669 status->float_rounding_mode = user_rnd_mode;
2670 status->floatx80_rounding_precision = user_rnd_prec;
2671
2672 a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign),
2673 status), status);
2674
2675 float_raise(float_flag_inexact, status);
2676
2677 return a;
2678 } else {
2679 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
2680 fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */
2681 fp0 = floatx80_add(fp0, float32_to_floatx80(
2682 make_float32(0x3F800000),
2683 status), status); /* EXP(Y)+1 */
2684 sign = aSign ? 0x80000000 : 0x00000000;
2685 fp1 = floatx80_div(float32_to_floatx80(make_float32(
2686 sign ^ 0xC0000000), status), fp0,
2687 status); /* -SIGN(X)*2 / [EXP(Y)+1] */
2688 fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000),
2689 status); /* SIGN */
2690
2691 status->float_rounding_mode = user_rnd_mode;
2692 status->floatx80_rounding_precision = user_rnd_prec;
2693
2694 a = floatx80_add(fp1, fp0, status);
2695
2696 float_raise(float_flag_inexact, status);
2697
2698 return a;
2699 }
2700 }
2701 } else { /* 2**(-40) < |X| < (5/2)LOG2 */
2702 fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */
2703 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
2704 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000),
2705 status),
2706 status); /* Z+2 */
2707
2708 vSign = extractFloatx80Sign(fp1);
2709 vExp = extractFloatx80Exp(fp1);
2710 vSig = extractFloatx80Frac(fp1);
2711
2712 fp1 = packFloatx80(vSign ^ aSign, vExp, vSig);
2713
2714 status->float_rounding_mode = user_rnd_mode;
2715 status->floatx80_rounding_precision = user_rnd_prec;
2716
2717 a = floatx80_div(fp0, fp1, status);
2718
2719 float_raise(float_flag_inexact, status);
2720
2721 return a;
2722 }
2723}
Laurent Viviereee6b892018-03-12 21:27:27 +01002724
2725/*----------------------------------------------------------------------------
2726 | Hyperbolic sine
2727 *----------------------------------------------------------------------------*/
2728
2729floatx80 floatx80_sinh(floatx80 a, float_status *status)
2730{
2731 flag aSign;
2732 int32_t aExp;
2733 uint64_t aSig;
2734
2735 int8_t user_rnd_mode, user_rnd_prec;
2736
2737 int32_t compact;
2738 floatx80 fp0, fp1, fp2;
2739 float32 fact;
2740
2741 aSig = extractFloatx80Frac(a);
2742 aExp = extractFloatx80Exp(a);
2743 aSign = extractFloatx80Sign(a);
2744
2745 if (aExp == 0x7FFF) {
2746 if ((uint64_t) (aSig << 1)) {
2747 return propagateFloatx80NaNOneArg(a, status);
2748 }
2749 return packFloatx80(aSign, floatx80_infinity.high,
2750 floatx80_infinity.low);
2751 }
2752
2753 if (aExp == 0 && aSig == 0) {
2754 return packFloatx80(aSign, 0, 0);
2755 }
2756
2757 user_rnd_mode = status->float_rounding_mode;
2758 user_rnd_prec = status->floatx80_rounding_precision;
2759 status->float_rounding_mode = float_round_nearest_even;
2760 status->floatx80_rounding_precision = 80;
2761
2762 compact = floatx80_make_compact(aExp, aSig);
2763
2764 if (compact > 0x400CB167) {
2765 /* SINHBIG */
2766 if (compact > 0x400CB2B3) {
2767 status->float_rounding_mode = user_rnd_mode;
2768 status->floatx80_rounding_precision = user_rnd_prec;
2769
2770 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2771 aSign, 0x8000, aSig, 0, status);
2772 } else {
2773 fp0 = floatx80_abs(a); /* Y = |X| */
2774 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2775 make_float64(0x40C62D38D3D64634), status),
2776 status); /* (|X|-16381LOG2_LEAD) */
2777 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2778 make_float64(0x3D6F90AEB1E75CC7), status),
2779 status); /* |X| - 16381 LOG2, ACCURATE */
2780 fp0 = floatx80_etox(fp0, status);
2781 fp2 = packFloatx80(aSign, 0x7FFB, one_sig);
2782
2783 status->float_rounding_mode = user_rnd_mode;
2784 status->floatx80_rounding_precision = user_rnd_prec;
2785
2786 a = floatx80_mul(fp0, fp2, status);
2787
2788 float_raise(float_flag_inexact, status);
2789
2790 return a;
2791 }
2792 } else { /* |X| < 16380 LOG2 */
2793 fp0 = floatx80_abs(a); /* Y = |X| */
2794 fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */
2795 fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000),
2796 status), status); /* 1+Z */
2797 fp2 = fp0;
2798 fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */
2799 fp0 = floatx80_add(fp0, fp2, status);
2800
2801 fact = packFloat32(aSign, 0x7E, 0);
2802
2803 status->float_rounding_mode = user_rnd_mode;
2804 status->floatx80_rounding_precision = user_rnd_prec;
2805
2806 a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status);
2807
2808 float_raise(float_flag_inexact, status);
2809
2810 return a;
2811 }
2812}
Laurent Vivier02f91242018-03-12 21:27:28 +01002813
2814/*----------------------------------------------------------------------------
2815 | Hyperbolic cosine
2816 *----------------------------------------------------------------------------*/
2817
2818floatx80 floatx80_cosh(floatx80 a, float_status *status)
2819{
2820 int32_t aExp;
2821 uint64_t aSig;
2822
2823 int8_t user_rnd_mode, user_rnd_prec;
2824
2825 int32_t compact;
2826 floatx80 fp0, fp1;
2827
2828 aSig = extractFloatx80Frac(a);
2829 aExp = extractFloatx80Exp(a);
2830
2831 if (aExp == 0x7FFF) {
2832 if ((uint64_t) (aSig << 1)) {
2833 return propagateFloatx80NaNOneArg(a, status);
2834 }
2835 return packFloatx80(0, floatx80_infinity.high,
2836 floatx80_infinity.low);
2837 }
2838
2839 if (aExp == 0 && aSig == 0) {
2840 return packFloatx80(0, one_exp, one_sig);
2841 }
2842
2843 user_rnd_mode = status->float_rounding_mode;
2844 user_rnd_prec = status->floatx80_rounding_precision;
2845 status->float_rounding_mode = float_round_nearest_even;
2846 status->floatx80_rounding_precision = 80;
2847
2848 compact = floatx80_make_compact(aExp, aSig);
2849
2850 if (compact > 0x400CB167) {
2851 if (compact > 0x400CB2B3) {
2852 status->float_rounding_mode = user_rnd_mode;
2853 status->floatx80_rounding_precision = user_rnd_prec;
2854 return roundAndPackFloatx80(status->floatx80_rounding_precision, 0,
2855 0x8000, one_sig, 0, status);
2856 } else {
2857 fp0 = packFloatx80(0, aExp, aSig);
2858 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2859 make_float64(0x40C62D38D3D64634), status),
2860 status);
2861 fp0 = floatx80_sub(fp0, float64_to_floatx80(
2862 make_float64(0x3D6F90AEB1E75CC7), status),
2863 status);
2864 fp0 = floatx80_etox(fp0, status);
2865 fp1 = packFloatx80(0, 0x7FFB, one_sig);
2866
2867 status->float_rounding_mode = user_rnd_mode;
2868 status->floatx80_rounding_precision = user_rnd_prec;
2869
2870 a = floatx80_mul(fp0, fp1, status);
2871
2872 float_raise(float_flag_inexact, status);
2873
2874 return a;
2875 }
2876 }
2877
2878 fp0 = packFloatx80(0, aExp, aSig); /* |X| */
2879 fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */
2880 fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000),
2881 status), status); /* (1/2)*EXP(|X|) */
2882 fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */
2883 fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */
2884
2885 status->float_rounding_mode = user_rnd_mode;
2886 status->floatx80_rounding_precision = user_rnd_prec;
2887
2888 a = floatx80_add(fp0, fp1, status);
2889
2890 float_raise(float_flag_inexact, status);
2891
2892 return a;
2893}