Merge pull request #21 from nibrunieAtSi5/bf16-support [#18] Adding minimal BFloat16 support

commit: eb498c55bab0f49c3903f69524e2674abe71c26b [log] [tgz]
author: Jerry Zhao <jerryz123@berkeley.edu> Fri Oct 13 15:23:50 2023 -0700
committer: GitHub <noreply@github.com> Fri Oct 13 15:23:50 2023 -0700
tree: be448123945308a8bc01e831adb0c83b640aa979
parent: 5c06db33fc1e2130f67c045327b0ec949032df1d [diff]
parent: bef5a34e1b2ef862e2c5cc0ed4f1ab6e92d01af1 [diff]
diff --git a/build/Linux-x86_64-GCC/Makefile b/build/Linux-x86_64-GCC/Makefile
index 2ee5dad..72f251a 100644
--- a/build/Linux-x86_64-GCC/Makefile
+++ b/build/Linux-x86_64-GCC/Makefile

@@ -94,6 +94,8 @@
   s_f16UIToCommonNaN$(OBJ) \
   s_commonNaNToF16UI$(OBJ) \
   s_propagateNaNF16UI$(OBJ) \
+  s_bf16UIToCommonNaN$(OBJ) \
+  s_commonNaNToBF16UI$(OBJ) \
   s_f32UIToCommonNaN$(OBJ) \
   s_commonNaNToF32UI$(OBJ) \
   s_propagateNaNF32UI$(OBJ) \
@@ -114,6 +116,8 @@
   s_roundToUI64$(OBJ) \
   s_roundToI32$(OBJ) \
   s_roundToI64$(OBJ) \
+  s_normSubnormalBF16Sig$(OBJ) \
+  s_roundPackToBF16$(OBJ) \
   s_normSubnormalF16Sig$(OBJ) \
   s_roundPackToF16$(OBJ) \
   s_normRoundPackToF16$(OBJ) \
@@ -172,6 +176,8 @@
   i64_to_extF80M$(OBJ) \
   i64_to_f128$(OBJ) \
   i64_to_f128M$(OBJ) \
+  bf16_isSignalingNaN$(OBJ) \
+  bf16_to_f32$(OBJ) \
   f16_to_ui32$(OBJ) \
   f16_to_ui64$(OBJ) \
   f16_to_i32$(OBJ) \
@@ -209,6 +215,7 @@
   f32_to_ui64_r_minMag$(OBJ) \
   f32_to_i32_r_minMag$(OBJ) \
   f32_to_i64_r_minMag$(OBJ) \
+  f32_to_bf16$(OBJ) \
   f32_to_f16$(OBJ) \
   f32_to_f64$(OBJ) \
   f32_to_extF80$(OBJ) \

diff --git a/build/template-FAST_INT64/Makefile b/build/template-FAST_INT64/Makefile
index 78e7ff5..e04c216 100644
--- a/build/template-FAST_INT64/Makefile
+++ b/build/template-FAST_INT64/Makefile

@@ -115,6 +115,8 @@
   s_roundToUI64$(OBJ) \
   s_roundToI32$(OBJ) \
   s_roundToI64$(OBJ) \
+  s_normSubnormalBF16Sig$(OBJ) \
+  s_roundPackToBF16$(OBJ) \
   s_normSubnormalF16Sig$(OBJ) \
   s_roundPackToF16$(OBJ) \
   s_normRoundPackToF16$(OBJ) \
@@ -173,6 +175,8 @@
   i64_to_extF80M$(OBJ) \
   i64_to_f128$(OBJ) \
   i64_to_f128M$(OBJ) \
+  bf16_isSignalingNaN$(OBJ) \
+  bf16_to_f32$(OBJ) \
   f16_to_ui32$(OBJ) \
   f16_to_ui64$(OBJ) \
   f16_to_i32$(OBJ) \
@@ -210,6 +214,7 @@
   f32_to_ui64_r_minMag$(OBJ) \
   f32_to_i32_r_minMag$(OBJ) \
   f32_to_i64_r_minMag$(OBJ) \
+  f32_to_bf16$(OBJ) \
   f32_to_f16$(OBJ) \
   f32_to_f64$(OBJ) \
   f32_to_extF80$(OBJ) \

diff --git a/source/8086-SSE/s_bf16UIToCommonNaN.c b/source/8086-SSE/s_bf16UIToCommonNaN.c
new file mode 100644
index 0000000..c1c774d
--- /dev/null
+++ b/source/8086-SSE/s_bf16UIToCommonNaN.c

@@ -0,0 +1,59 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Assuming `uiA' has the bit pattern of a BF16 NaN, converts
+| this NaN to the common NaN form, and stores the resulting common NaN at the
+| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr )
+{
+
+    if ( softfloat_isSigNaNBF16UI( uiA ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+    }
+    zPtr->sign = uiA>>15;
+    zPtr->v64  = (uint_fast64_t) uiA<<56;
+    zPtr->v0   = 0;
+
+}
+

diff --git a/source/8086-SSE/s_commonNaNToBF16UI.c b/source/8086-SSE/s_commonNaNToBF16UI.c
new file mode 100644
index 0000000..d81cf51
--- /dev/null
+++ b/source/8086-SSE/s_commonNaNToBF16UI.c

@@ -0,0 +1,51 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "specialize.h"
+
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by `aPtr' into a BF16 NaN, and 
+| returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
+uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr )
+{
+
+    return (uint_fast16_t) aPtr->sign<<15 | 0x7FC0 | aPtr->v64>>56;
+
+}
+

diff --git a/source/8086-SSE/specialize.h b/source/8086-SSE/specialize.h
index a9166e1..8ed2e75 100644
--- a/source/8086-SSE/specialize.h
+++ b/source/8086-SSE/specialize.h

@@ -118,6 +118,27 @@
  softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
 
 /*----------------------------------------------------------------------------
+| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
+| 16-bit brain floating-point (BF16) signaling NaN.
+| Note:  This macro evaluates its argument more than once.
+*----------------------------------------------------------------------------*/
+#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
+
+/*----------------------------------------------------------------------------
+| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
+| this NaN to the common NaN form, and stores the resulting common NaN at the
+| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
+
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
+| NaN, and returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
+uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr );
+
+/*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNF32UI 0xFFC00000

diff --git a/source/RISCV/s_bf16UIToCommonNaN.c b/source/RISCV/s_bf16UIToCommonNaN.c
new file mode 100644
index 0000000..861b269
--- /dev/null
+++ b/source/RISCV/s_bf16UIToCommonNaN.c

@@ -0,0 +1,5 @@
+
+/*----------------------------------------------------------------------------
+| This file intentionally contains no code.
+*----------------------------------------------------------------------------*/
+

diff --git a/source/RISCV/s_commonNaNToBF16UI.c b/source/RISCV/s_commonNaNToBF16UI.c
new file mode 100644
index 0000000..861b269
--- /dev/null
+++ b/source/RISCV/s_commonNaNToBF16UI.c

@@ -0,0 +1,5 @@
+
+/*----------------------------------------------------------------------------
+| This file intentionally contains no code.
+*----------------------------------------------------------------------------*/
+

diff --git a/source/RISCV/specialize.h b/source/RISCV/specialize.h
index c638264..cb95900 100644
--- a/source/RISCV/specialize.h
+++ b/source/RISCV/specialize.h

@@ -88,6 +88,13 @@
 #define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
 
 /*----------------------------------------------------------------------------
+| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
+| 16-bit brain floating-point (BF16) signaling NaN.
+| Note:  This macro evaluates its argument more than once.
+*----------------------------------------------------------------------------*/
+#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
+
+/*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
@@ -96,6 +103,14 @@
 #define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid )
 
 /*----------------------------------------------------------------------------
+| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
+| this NaN to the common NaN form, and stores the resulting common NaN at the
+| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0040) ) softfloat_raiseFlags( softfloat_flag_invalid )
+
+/*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
@@ -111,6 +126,17 @@
  softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
 
 /*----------------------------------------------------------------------------
+| The bit pattern for a default generated 16-bit BF16 floating-point NaN.
+*----------------------------------------------------------------------------*/
+#define defaultNaNBF16UI 0x7FC0
+
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
+| NaN, and returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
+#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI)
+
+/*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNF32UI 0x7FC00000

diff --git a/source/bf16_isSignalingNaN.c b/source/bf16_isSignalingNaN.c
new file mode 100644
index 0000000..79ca87f
--- /dev/null
+++ b/source/bf16_isSignalingNaN.c

@@ -0,0 +1,51 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool bf16_isSignalingNaN( bfloat16_t a )
+{
+    union ui16_bf16 uA;
+
+    uA.f = a;
+    return softfloat_isSigNaNBF16UI( uA.ui );
+
+}
+

diff --git a/source/bf16_to_f32.c b/source/bf16_to_f32.c
new file mode 100644
index 0000000..b86482c
--- /dev/null
+++ b/source/bf16_to_f32.c

@@ -0,0 +1,90 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float32_t bf16_to_f32( bfloat16_t a )
+{
+    union ui16_bf16 uA;
+    uint_fast16_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint_fast16_t frac;
+    struct commonNaN commonNaN;
+    uint_fast32_t uiZ;
+    struct exp8_sig16 normExpSig;
+    union ui32_f32 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signBF16UI( uiA );
+    exp  = expBF16UI( uiA );
+    frac = fracBF16UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // NaN or Inf
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+            softfloat_bf16UIToCommonNaN( uiA, &commonNaN );
+            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
+        } else {
+            uiZ = packToF32UI( sign, 0xFF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // packToF32UI simply packs bitfields without any numerical change
+    // which means it can be used directly for any BF16 to f32 conversions which
+    // does not require bits manipulation
+    // (that is everything where the 16-bit are just padded right with 16 zeros, including
+    //  subnormal numbers)
+    uiZ = packToF32UI( sign, exp, ((uint_fast32_t) frac) <<16 );
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+
+

diff --git a/source/f32_to_bf16.c b/source/f32_to_bf16.c
new file mode 100644
index 0000000..6f81493
--- /dev/null
+++ b/source/f32_to_bf16.c

@@ -0,0 +1,105 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+
+bfloat16_t f32_to_bf16( float32_t a )
+{
+    union ui32_f32 uA;
+    uint_fast32_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint_fast32_t frac;
+    struct commonNaN commonNaN;
+    uint_fast16_t uiZ, frac16;
+    union ui16_bf16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF32UI( uiA );
+    exp  = expF32UI( uiA );
+    frac = fracF32UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // infinity or NaN cases
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+            // NaN case
+            softfloat_f32UIToCommonNaN( uiA, &commonNaN );
+            uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
+        } else {
+            // infinity case
+            uiZ = packToBF16UI( sign, 0xFF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // frac is a 24-bit mantissa, right shifted by 9
+    // In the normal case, (24-9) = 15 are set 
+    frac16 = frac>>9 | ((frac & 0x1FF) != 0);
+    if ( ! (exp | frac16) ) {
+        uiZ = packToBF16UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // softfloat_roundPackToBF16 exponent argument (2nd argument)
+    // must correspond to the exponent of fracIn[13] bits
+    // (fracIn is the 3rd and last argument) 
+    uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number
+    // exponent for the lowest normal and largest subnormal should be equal
+    // but is not in IEEE encoding so mantissa must be partially normalized
+    // (by one bit) for subnormal numbers. Such that (exp - 1) corresponds
+    // to the exponent of frac16[13]
+    frac16 = frac16 << (exp ? 0 : 1);
+    return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask );
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+

diff --git a/source/f32_to_f16.c b/source/f32_to_f16.c
index a00b2e8..45005fe 100644
--- a/source/f32_to_f16.c
+++ b/source/f32_to_f16.c

@@ -72,6 +72,9 @@
     }
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
+    // frac is a 24-bit significand, the bottom 9 bits LSB are extracted and OR-red
+    // into a sticky flag, the top 15 MSBs are extracted, the LSB of this top slice
+    // is OR-red with the sticky 
     frac16 = frac>>9 | ((frac & 0x1FF) != 0);
     if ( ! (exp | frac16) ) {
         uiZ = packToF16UI( sign, 0, 0 );

diff --git a/source/include/internals.h b/source/include/internals.h
index f8eac05..5e6d4bd 100644
--- a/source/include/internals.h
+++ b/source/include/internals.h

@@ -43,6 +43,7 @@
 #include "softfloat_types.h"
 
 union ui16_f16 { uint16_t ui; float16_t f; };
+union ui16_bf16 { uint16_t ui; bfloat16_t f; };
 union ui32_f32 { uint32_t ui; float32_t f; };
 union ui64_f64 { uint64_t ui; float64_t f; };
 
@@ -101,6 +102,18 @@
 
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
+#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
+#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
+#define fracBF16UI( a ) ((a) & 0x07F)
+#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig))
+
+#define isNaNBF16UI( a ) (((~(a) & 0x7FC0) == 0) && ((a) & 0x07F))
+
+bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t );
+struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t );
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
 #define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))
 #define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
 #define fracF32UI( a ) ((a) & 0x007FFFFF)

diff --git a/source/include/softfloat.h b/source/include/softfloat.h
index 9ed17c1..c1757b8 100644
--- a/source/include/softfloat.h
+++ b/source/include/softfloat.h

@@ -170,6 +170,13 @@
 bool f16_isSignalingNaN( float16_t );
 
 /*----------------------------------------------------------------------------
+| 16-bit (brain float 16) floating-point operations.
+*----------------------------------------------------------------------------*/
+float32_t bf16_to_f32( bfloat16_t );
+bfloat16_t f32_to_bf16( float32_t );
+bool bf16_isSignalingNaN( bfloat16_t );
+
+/*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
 uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );

diff --git a/source/include/softfloat_types.h b/source/include/softfloat_types.h
index b92d246..25039b5 100644
--- a/source/include/softfloat_types.h
+++ b/source/include/softfloat_types.h

@@ -48,6 +48,7 @@
 | (typically 'float' and 'double', and possibly 'long double').
 *----------------------------------------------------------------------------*/
 typedef struct { uint16_t v; } float16_t;
+typedef struct { uint16_t v; } bfloat16_t;
 typedef struct { uint32_t v; } float32_t;
 typedef struct { uint64_t v; } float64_t;
 typedef struct { uint64_t v[2]; } float128_t;

diff --git a/source/s_normSubnormalBF16Sig.c b/source/s_normSubnormalBF16Sig.c
new file mode 100644
index 0000000..b81baa9
--- /dev/null
+++ b/source/s_normSubnormalBF16Sig.c

@@ -0,0 +1,52 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t sig )
+{
+    int_fast8_t shiftDist;
+    struct exp8_sig16 z;
+
+    shiftDist = softfloat_countLeadingZeros16( sig ) - 8;
+    z.exp = 1 - shiftDist;
+    z.sig = sig<<shiftDist;
+    return z;
+
+}
+

diff --git a/source/s_roundPackToBF16.c b/source/s_roundPackToBF16.c
new file mode 100644
index 0000000..57a7ae4
--- /dev/null
+++ b/source/s_roundPackToBF16.c

@@ -0,0 +1,114 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+/** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */
+bfloat16_t
+ softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
+{
+    uint_fast8_t roundingMode;
+    bool roundNearEven;
+    uint_fast8_t roundIncrement, roundBits;
+    bool isTiny;
+    uint_fast16_t uiZ;
+    union ui16_bf16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundingMode = softfloat_roundingMode;
+    roundNearEven = (roundingMode == softfloat_round_near_even);
+    roundIncrement = 0x40;
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+        roundIncrement =
+            (roundingMode
+                 == (sign ? softfloat_round_min : softfloat_round_max))
+                ? 0x7F
+                : 0;
+    }
+    roundBits = sig & 0x7F;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0xFD <= (unsigned int) exp ) {
+        if ( exp < 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            isTiny =
+                (softfloat_detectTininess == softfloat_tininess_beforeRounding)
+                    || (exp < -1) || (sig + roundIncrement < 0x8000);
+            sig = softfloat_shiftRightJam32( sig, -exp );
+            exp = 0;
+            roundBits = sig & 0x7F;
+            if ( isTiny && roundBits ) {
+                softfloat_raiseFlags( softfloat_flag_underflow );
+            }
+        } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            softfloat_raiseFlags(
+                softfloat_flag_overflow | softfloat_flag_inexact );
+            uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
+            goto uiZ;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig = (sig + roundIncrement)>>7;
+    if ( roundBits ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+        if ( roundingMode == softfloat_round_odd ) {
+            sig |= 1;
+            goto packReturn;
+        }
+#endif
+    }
+    sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
+    if ( ! sig ) exp = 0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ packReturn:
+    uiZ = packToBF16UI( sign, exp, sig );
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
commit	eb498c55bab0f49c3903f69524e2674abe71c26b	[log] [tgz]
author	Jerry Zhao <jerryz123@berkeley.edu>	Fri Oct 13 15:23:50 2023 -0700
committer	GitHub <noreply@github.com>	Fri Oct 13 15:23:50 2023 -0700
tree	be448123945308a8bc01e831adb0c83b640aa979
parent	5c06db33fc1e2130f67c045327b0ec949032df1d [diff]
parent	bef5a34e1b2ef862e2c5cc0ed4f1ab6e92d01af1 [diff]