Browse Source

Merge pull request #1902 from chihminchao/extend-bf16

Extend bf16
pull/1896/head
Andrew Waterman 1 year ago
committed by GitHub
parent
commit
bfb67c1954
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 36
      softfloat/bf16_classify.c
  2. 65
      softfloat/bf16_cmp.c
  3. 48
      softfloat/bf16_to_i32.c
  4. 57
      softfloat/bf16_to_i8.c
  5. 48
      softfloat/bf16_to_ui32.c
  6. 54
      softfloat/bf16_to_ui8.c
  7. 0
      softfloat/f128_classify.c
  8. 0
      softfloat/f16_classify.c
  9. 0
      softfloat/f32_classify.c
  10. 0
      softfloat/f64_classify.c
  11. 16
      softfloat/fall_maxmin.c
  12. 80
      softfloat/fall_reciprocal.c
  13. 63
      softfloat/fall_sign.c
  14. 52
      softfloat/i32_to_bf16.c
  15. 15
      softfloat/internals.h
  16. 18
      softfloat/softfloat.h
  17. 9
      softfloat/softfloat.mk.in
  18. 7
      softfloat/specialize.h
  19. 51
      softfloat/ui32_to_bf16.c

36
softfloat/bf16_classify.c

@ -0,0 +1,36 @@
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
uint_fast16_t bf16_classify( bfloat16_t a )
{
union ui16_f16 uA;
uint_fast16_t uiA;
uA.f = a;
uiA = uA.ui;
uint_fast16_t infOrNaN = expBF16UI( uiA ) == 0xFF;
uint_fast16_t subnormalOrZero = expBF16UI( uiA ) == 0;
bool sign = signBF16UI( uiA );
bool fracZero = fracBF16UI( uiA ) == 0;
bool isNaN = isNaNBF16UI( uiA );
bool isSNaN = softfloat_isSigNaNBF16UI( uiA );
return
( sign && infOrNaN && fracZero ) << 0 |
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
( sign && subnormalOrZero && !fracZero ) << 2 |
( sign && subnormalOrZero && fracZero ) << 3 |
( !sign && infOrNaN && fracZero ) << 7 |
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
( !sign && subnormalOrZero && !fracZero ) << 5 |
( !sign && subnormalOrZero && fracZero ) << 4 |
( isNaN && isSNaN ) << 8 |
( isNaN && !isSNaN ) << 9;
}

65
softfloat/bf16_cmp.c

@ -0,0 +1,65 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"
bool bf16_eq( bfloat16_t a, bfloat16_t b )
{
float32_t f32A = { (uint_fast32_t)a.v << 16 };
float32_t f32B = { (uint_fast32_t)b.v << 16 };
return f32_eq ( f32A, f32B );
}
bool bf16_le( bfloat16_t a, bfloat16_t b )
{
float32_t f32A = { (uint_fast32_t)a.v << 16 };
float32_t f32B = { (uint_fast32_t)b.v << 16 };
return f32_le ( f32A, f32B );
}
bool bf16_lt( bfloat16_t a, bfloat16_t b )
{
float32_t f32A = { (uint_fast32_t)a.v << 16 };
float32_t f32B = { (uint_fast32_t)b.v << 16 };
return f32_lt ( f32A, f32B );
}

48
softfloat/bf16_to_i32.c

@ -0,0 +1,48 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
int_fast32_t bf16_to_i32( bfloat16_t a, uint_fast8_t roundingMode, bool exact )
{
return f32_to_i32(bf16_to_f32(a), roundingMode, exact);
}

57
softfloat/bf16_to_i8.c

@ -0,0 +1,57 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdint.h>
#include "specialize.h"
#include "softfloat.h"
int_fast8_t bf16_to_i8( bfloat16_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast8_t old_flags = softfloat_exceptionFlags;
int_fast32_t sig32 = bf16_to_i32(a, roundingMode, exact);
if (sig32 > INT8_MAX) {
softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
return i8_fromPosOverflow;
} else if (sig32 < INT8_MIN) {
softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
return i8_fromNegOverflow;
} else {
return sig32;
}
}

48
softfloat/bf16_to_ui32.c

@ -0,0 +1,48 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
uint_fast32_t bf16_to_ui32( float16_t a, uint_fast8_t roundingMode, bool exact )
{
return f32_to_ui32(bf16_to_f32(a), roundingMode, exact);
}

54
softfloat/bf16_to_ui8.c

@ -0,0 +1,54 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdint.h>
#include "specialize.h"
#include "softfloat.h"
uint_fast8_t bf16_to_ui8( bfloat16_t a, uint_fast8_t roundingMode, bool exact )
{
uint_fast8_t old_flags = softfloat_exceptionFlags;
uint_fast32_t sig32 = bf16_to_ui32(a, roundingMode, exact);
if (sig32 > UINT8_MAX) {
softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
return ui8_fromPosOverflow;
} else {
return sig32;
}
}

0
softfloat/f128_classify.c

0
softfloat/f16_classify.c

0
softfloat/f32_classify.c

0
softfloat/f64_classify.c

16
softfloat/fall_maxmin.c

@ -79,3 +79,19 @@ COMPARE_MAX(a, b, 64);
COMPARE_MIN(a, b, 16); COMPARE_MIN(a, b, 16);
COMPARE_MIN(a, b, 32); COMPARE_MIN(a, b, 32);
COMPARE_MIN(a, b, 64); COMPARE_MIN(a, b, 64);
bfloat16_t bf16_max( bfloat16_t a, bfloat16_t b )
{
float32_t f32A = { (uint32_t)a.v << 16 };
float32_t f32B = { (uint32_t)b.v << 16 };
return f32_to_bf16 ( f32_max( f32A, f32B ) );
}
bfloat16_t bf16_min( bfloat16_t a, bfloat16_t b )
{
float32_t f32A = { (uint32_t)a.v << 16 };
float32_t f32B = { (uint32_t)b.v << 16 };
return f32_to_bf16 ( f32_min( f32A, f32B ) );
}

80
softfloat/fall_reciprocal.c

@ -93,6 +93,43 @@ static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
return (sign << (s+e)) | (out_exp << s) | out_sig; return (sign << (s+e)) | (out_exp << s) | out_sig;
} }
bfloat16_t bf16_rsqrte7(bfloat16_t in)
{
union ui16_bf16 uA;
uA.f = in;
unsigned int ret = bf16_classify(in);
bool sub = false;
switch(ret) {
case 0x001: // -inf
case 0x002: // -normal
case 0x004: // -subnormal
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
case 0x200: //qNaN
uA.ui = defaultNaNBF16UI;
break;
case 0x008: // -0
uA.ui = 0xff80;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f80;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x020: //+ sub
sub = true;
default: // +num
uA.ui = rsqrte7(uA.ui, 8, 7, sub);
break;
}
return uA.f;
}
float16_t f16_rsqrte7(float16_t in) float16_t f16_rsqrte7(float16_t in)
{ {
union ui16_f16 uA; union ui16_f16 uA;
@ -262,6 +299,49 @@ static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
return (sign << (s+e)) | (out_exp << s) | out_sig; return (sign << (s+e)) | (out_exp << s) | out_sig;
} }
bfloat16_t bf16_recip7(bfloat16_t in)
{
union ui16_bf16 uA;
uA.f = in;
unsigned int ret = bf16_classify(in);
bool sub = false;
bool round_abnormal = false;
switch(ret) {
case 0x001: // -inf
uA.ui = 0x8000;
break;
case 0x080: //+inf
uA.ui = 0x0;
break;
case 0x008: // -0
uA.ui = 0xff80;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x010: // +0
uA.ui = 0x7f80;
softfloat_exceptionFlags |= softfloat_flag_infinite;
break;
case 0x100: // sNaN
softfloat_exceptionFlags |= softfloat_flag_invalid;
case 0x200: //qNaN
uA.ui = defaultNaNBF16UI;
break;
case 0x004: // -subnormal
case 0x020: //+ sub
sub = true;
default: // +- normal
uA.ui = recip7(uA.ui, 8, 7,
softfloat_roundingMode, sub, &round_abnormal);
if (round_abnormal)
softfloat_exceptionFlags |= softfloat_flag_inexact |
softfloat_flag_overflow;
break;
}
return uA.f;
}
float16_t f16_recip7(float16_t in) float16_t f16_recip7(float16_t in)
{ {
union ui16_f16 uA; union ui16_f16 uA;

63
softfloat/fall_sign.c

@ -0,0 +1,63 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
bool bf16_sign( bfloat16_t a )
{
return signBF16UI(a.v);
}
bool f16_sign( float16_t a )
{
return signF16UI(a.v);
}
bool f32_sign( float32_t a )
{
return signF32UI(a.v);
}
bool f64_sign( float64_t a )
{
return signF64UI(a.v);
}

52
softfloat/i32_to_bf16.c

@ -0,0 +1,52 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"
bfloat16_t i32_to_bf16( int32_t a )
{
uint_fast8_t origin_rounding_mode = softfloat_roundingMode;
softfloat_roundingMode = softfloat_round_odd;
float32_t tmp_val = i32_to_f32(a);
softfloat_roundingMode = origin_rounding_mode;
return f32_to_bf16(tmp_val);
}

15
softfloat/internals.h

@ -46,6 +46,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern "C" { extern "C" {
#endif #endif
union ui16_bf16 { uint16_t ui; bfloat16_t f; };
union ui16_f16 { uint16_t ui; float16_t f; }; union ui16_f16 { uint16_t ui; float16_t f; };
union ui32_f32 { uint32_t ui; float32_t f; }; union ui32_f32 { uint32_t ui; float32_t f; };
union ui64_f64 { uint64_t ui; float64_t f; }; union ui64_f64 { uint64_t ui; float64_t f; };
@ -84,16 +85,20 @@ int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool );
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F)
#define fracF16UI( a ) ((a) & 0x03FF)
#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))
#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15)) #define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF) #define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
#define fracBF16UI( a ) ((a) & 0x07F) #define fracBF16UI( a ) ((a) & 0x07F)
#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig)) #define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig))
#define isNaNBF16UI( a ) (((~(a) & 0x7F80) == 0) && ((a) & 0x007F))
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F)
#define fracF16UI( a ) ((a) & 0x03FF)
#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))
#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF)) #define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; }; struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; };

18
softfloat/softfloat.h

@ -103,6 +103,7 @@ void softfloat_raiseFlags( uint_fast8_t );
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
| Integer-to-floating-point conversion routines. | Integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
bfloat16_t ui32_to_bf16( uint32_t );
float16_t ui32_to_f16( uint32_t ); float16_t ui32_to_f16( uint32_t );
float32_t ui32_to_f32( uint32_t ); float32_t ui32_to_f32( uint32_t );
float64_t ui32_to_f64( uint32_t ); float64_t ui32_to_f64( uint32_t );
@ -121,6 +122,7 @@ float128_t ui64_to_f128( uint64_t );
#endif #endif
void ui64_to_extF80M( uint64_t, extFloat80_t * ); void ui64_to_extF80M( uint64_t, extFloat80_t * );
void ui64_to_f128M( uint64_t, float128_t * ); void ui64_to_f128M( uint64_t, float128_t * );
bfloat16_t i32_to_bf16( int32_t );
float16_t i32_to_f16( int32_t ); float16_t i32_to_f16( int32_t );
float32_t i32_to_f32( int32_t ); float32_t i32_to_f32( int32_t );
float64_t i32_to_f64( int32_t ); float64_t i32_to_f64( int32_t );
@ -180,6 +182,7 @@ bool f16_eq_signaling( float16_t, float16_t );
bool f16_le_quiet( float16_t, float16_t ); bool f16_le_quiet( float16_t, float16_t );
bool f16_lt_quiet( float16_t, float16_t ); bool f16_lt_quiet( float16_t, float16_t );
bool f16_isSignalingNaN( float16_t ); bool f16_isSignalingNaN( float16_t );
bool f16_sign( float16_t );
uint_fast16_t f16_classify( float16_t ); uint_fast16_t f16_classify( float16_t );
float16_t f16_rsqrte7( float16_t ); float16_t f16_rsqrte7( float16_t );
float16_t f16_recip7( float16_t ); float16_t f16_recip7( float16_t );
@ -187,6 +190,10 @@ float16_t f16_recip7( float16_t );
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
| BFloat16 operations. | BFloat16 operations.
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
uint_fast8_t bf16_to_ui8( bfloat16_t, uint_fast8_t, bool );
uint_fast32_t bf16_to_ui32( bfloat16_t, uint_fast8_t, bool );
int_fast8_t bf16_to_i8( bfloat16_t, uint_fast8_t, bool );
int_fast32_t bf16_to_i32( bfloat16_t, uint_fast8_t, bool );
float32_t bf16_to_f32( bfloat16_t ); float32_t bf16_to_f32( bfloat16_t );
float64_t bf16_to_f64( bfloat16_t ); float64_t bf16_to_f64( bfloat16_t );
bfloat16_t bf16_add( bfloat16_t, bfloat16_t ); bfloat16_t bf16_add( bfloat16_t, bfloat16_t );
@ -195,6 +202,15 @@ bfloat16_t bf16_mul( bfloat16_t, bfloat16_t );
bfloat16_t bf16_mulAdd( bfloat16_t, bfloat16_t, bfloat16_t ); bfloat16_t bf16_mulAdd( bfloat16_t, bfloat16_t, bfloat16_t );
bfloat16_t bf16_div( bfloat16_t, bfloat16_t ); bfloat16_t bf16_div( bfloat16_t, bfloat16_t );
bfloat16_t bf16_sqrt( bfloat16_t ); bfloat16_t bf16_sqrt( bfloat16_t );
bfloat16_t bf16_max( bfloat16_t, bfloat16_t );
bfloat16_t bf16_min( bfloat16_t, bfloat16_t );
bool bf16_eq( bfloat16_t, bfloat16_t );
bool bf16_le( bfloat16_t, bfloat16_t );
bool bf16_lt( bfloat16_t, bfloat16_t );
bool bf16_sign( bfloat16_t );
uint_fast16_t bf16_classify( bfloat16_t );
bfloat16_t bf16_rsqrte7( bfloat16_t );
bfloat16_t bf16_recip7( bfloat16_t );
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
| 32-bit (single-precision) floating-point operations. | 32-bit (single-precision) floating-point operations.
@ -235,6 +251,7 @@ bool f32_eq_signaling( float32_t, float32_t );
bool f32_le_quiet( float32_t, float32_t ); bool f32_le_quiet( float32_t, float32_t );
bool f32_lt_quiet( float32_t, float32_t ); bool f32_lt_quiet( float32_t, float32_t );
bool f32_isSignalingNaN( float32_t ); bool f32_isSignalingNaN( float32_t );
bool f32_sign( float32_t );
uint_fast16_t f32_classify( float32_t ); uint_fast16_t f32_classify( float32_t );
float32_t f32_rsqrte7( float32_t ); float32_t f32_rsqrte7( float32_t );
float32_t f32_recip7( float32_t ); float32_t f32_recip7( float32_t );
@ -276,6 +293,7 @@ bool f64_eq_signaling( float64_t, float64_t );
bool f64_le_quiet( float64_t, float64_t ); bool f64_le_quiet( float64_t, float64_t );
bool f64_lt_quiet( float64_t, float64_t ); bool f64_lt_quiet( float64_t, float64_t );
bool f64_isSignalingNaN( float64_t ); bool f64_isSignalingNaN( float64_t );
bool f64_sign( float64_t );
uint_fast16_t f64_classify( float64_t ); uint_fast16_t f64_classify( float64_t );
float64_t f64_rsqrte7( float64_t ); float64_t f64_rsqrte7( float64_t );
float64_t f64_recip7( float64_t ); float64_t f64_recip7( float64_t );

9
softfloat/softfloat.mk.in

@ -7,8 +7,14 @@ softfloat_c_srcs = \
bf16_mulAdd.c \ bf16_mulAdd.c \
bf16_sqrt.c \ bf16_sqrt.c \
bf16_sub.c \ bf16_sub.c \
bf16_cmp.c \
bf16_classify.c \
bf16_to_f32.c \ bf16_to_f32.c \
bf16_to_f64.c \ bf16_to_f64.c \
bf16_to_i8.c \
bf16_to_i32.c \
bf16_to_ui8.c \
bf16_to_ui32.c \
f128_add.c \ f128_add.c \
f128_classify.c \ f128_classify.c \
f128_div.c \ f128_div.c \
@ -127,7 +133,9 @@ softfloat_c_srcs = \
f64_to_ui64_r_minMag.c \ f64_to_ui64_r_minMag.c \
fall_maxmin.c \ fall_maxmin.c \
fall_reciprocal.c \ fall_reciprocal.c \
fall_sign.c \
i32_to_f128.c \ i32_to_f128.c \
i32_to_bf16.c \
i32_to_f16.c \ i32_to_f16.c \
i32_to_f32.c \ i32_to_f32.c \
i32_to_f64.c \ i32_to_f64.c \
@ -228,6 +236,7 @@ softfloat_c_srcs = \
s_subMagsF64.c \ s_subMagsF64.c \
s_subM.c \ s_subM.c \
ui32_to_f128.c \ ui32_to_f128.c \
ui32_to_bf16.c \
ui32_to_f16.c \ ui32_to_f16.c \
ui32_to_f32.c \ ui32_to_f32.c \
ui32_to_f64.c \ ui32_to_f64.c \

7
softfloat/specialize.h

@ -103,6 +103,13 @@ struct commonNaN { char _unused; };
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
#define defaultNaNBF16UI 0x7FC0 #define defaultNaNBF16UI 0x7FC0
/*----------------------------------------------------------------------------
| Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a
| 16-bit bfloating-point signaling NaN.
| Note: This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x007F))
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
| Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a | Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a
| 16-bit floating-point signaling NaN. | 16-bit floating-point signaling NaN.

51
softfloat/ui32_to_bf16.c

@ -0,0 +1,51 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California. All Rights Reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"
bfloat16_t ui32_to_bf16( uint32_t a )
{
uint_fast8_t origin_rounding_mode = softfloat_roundingMode;
softfloat_roundingMode = softfloat_round_odd;
float32_t tmp_val = ui32_to_f32(a);
softfloat_roundingMode = origin_rounding_mode;
return f32_to_bf16(tmp_val);
}
Loading…
Cancel
Save