neorv32/sw/floating__point__test_2neorv32__zfinx__extension__intrinsics_8h_source.html

// #################################################################################################

// # << NEORV32 - Intrinsics + Emulation Functions for the RISC-V "Zfinx" CPU extension >>         #

// # ********************************************************************************************* #

// # The intrinsics provided by this library allow to use the hardware floating-point unit of the  #

// # RISC-V Zfinx CPU extension without the need for Zfinx support by the compiler / toolchain.    #

// # ********************************************************************************************* #

// # BSD 3-Clause License                                                                          #

// #                                                                                               #

// # Copyright (c) 2024, Stephan Nolting. All rights reserved.                                     #

// #                                                                                               #

// # Redistribution and use in source and binary forms, with or without modification, are          #

// # permitted provided that the following conditions are met:                                     #

// #                                                                                               #

// # 1. Redistributions of source code must retain the above copyright notice, this list of        #

// #    conditions and the following disclaimer.                                                   #

// #                                                                                               #

// # 2. Redistributions in binary form must reproduce the above copyright notice, this list of     #

// #    conditions and the following disclaimer in the documentation and/or other materials        #

// #    provided with the distribution.                                                            #

// #                                                                                               #

// # 3. Neither the name of the copyright holder nor the names of its contributors may be used to  #

// #    endorse or promote products derived from this software without specific prior written      #

// #    permission.                                                                                #

// #                                                                                               #

// # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS   #

// # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF               #

// # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE    #

// # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,     #

// # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE #

// # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED    #

// # AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING     #

// # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED  #

// # OF THE POSSIBILITY OF SUCH DAMAGE.                                                            #

// # ********************************************************************************************* #

// # The NEORV32 Processor - https://github.com/stnolting/neorv32              (c) Stephan Nolting #

// #################################################################################################


/**********************************************************************/


#ifndef neorv32_zfinx_extension_intrinsics_h

#define neorv32_zfinx_extension_intrinsics_h


#define __USE_GNU

#define _GNU_SOURCE

#include <float.h>

#include <math.h>


/**********************************************************************/

#if defined __riscv_f || (__riscv_flen == 32)

  #error Application programs using the Zfinx intrinsic library have to be compiled WITHOUT the <F> MARCH ISA attribute!

#endif


/**********************************************************************/

typedef union

{

  uint32_t binary_value;

  float    float_value;

} float_conv_t;


// ################################################################################################

// Helper functions

// ################################################################################################


/**********************************************************************/


float subnormal_flush(float tmp) {


  float res = tmp;


  // flush to zero if subnormal

  if (fpclassify(tmp) == FP_SUBNORMAL) {

    if (signbit(tmp) != 0) {

      res = -0.0f;

    }

    else {

      res = +0.0f;

    }

  }


  return res;

}


// ################################################################################################

// "Intrinsics"

// ################################################################################################


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fadds(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0000000, opb.binary_value, opa.binary_value, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fsubs(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0000100, opb.binary_value, opa.binary_value, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fmuls(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0001000, opb.binary_value, opa.binary_value, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fmins(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0010100, opb.binary_value, opa.binary_value, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fmaxs(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0010100, opb.binary_value, opa.binary_value, 0b001, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_fcvt_wus(float rs1) {


  float_conv_t opa;

  opa.float_value = rs1;


  return CUSTOM_INSTR_R2_TYPE(0b1100000, 0b00001, opa.binary_value, 0b000, 0b1010011);

}


/**********************************************************************/


inline int32_t __attribute__ ((always_inline)) riscv_intrinsic_fcvt_ws(float rs1) {


  float_conv_t opa;

  opa.float_value = rs1;


  return (int32_t)CUSTOM_INSTR_R2_TYPE(0b1100000, 0b00000, opa.binary_value, 0b000, 0b1010011);

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fcvt_swu(uint32_t rs1) {


  float_conv_t res;


  res.binary_value = CUSTOM_INSTR_R2_TYPE(0b1101000, 0b00001, rs1, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fcvt_sw(int32_t rs1) {


  float_conv_t res;


  res.binary_value = CUSTOM_INSTR_R2_TYPE(0b1101000, 0b00000, rs1, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_feqs(float rs1, float rs2) {


  float_conv_t opa, opb;

  opa.float_value = rs1;

  opb.float_value = rs2;


  return CUSTOM_INSTR_R3_TYPE(0b1010000, opb.binary_value, opa.binary_value, 0b010, 0b1010011);

}


/**********************************************************************/


inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_flts(float rs1, float rs2) {


  float_conv_t opa, opb;

  opa.float_value = rs1;

  opb.float_value = rs2;


  return CUSTOM_INSTR_R3_TYPE(0b1010000, opb.binary_value, opa.binary_value, 0b001, 0b1010011);

}


/**********************************************************************/


inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_fles(float rs1, float rs2) {


  float_conv_t opa, opb;

  opa.float_value = rs1;

  opb.float_value = rs2;


  return CUSTOM_INSTR_R3_TYPE(0b1010000, opb.binary_value, opa.binary_value, 0b000, 0b1010011);

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fsgnjs(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0010000, opb.binary_value, opa.binary_value, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fsgnjns(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0010000, opb.binary_value, opa.binary_value, 0b001, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fsgnjxs(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0010000, opb.binary_value, opa.binary_value, 0b010, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_fclasss(float rs1) {


  float_conv_t opa;

  opa.float_value = rs1;


  return CUSTOM_INSTR_R2_TYPE(0b1110000, 0b00000, opa.binary_value, 0b001, 0b1010011);

}


// ################################################################################################

// !!! UNSUPPORTED instructions !!!

// ################################################################################################


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fdivs(float rs1, float rs2) {


  float_conv_t opa, opb, res;

  opa.float_value = rs1;

  opb.float_value = rs2;


  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0001100, opb.binary_value, opa.binary_value, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fsqrts(float rs1) {


  float_conv_t opa, res;

  opa.float_value = rs1;


  res.binary_value = CUSTOM_INSTR_R2_TYPE(0b0101100, 0b00000, opa.binary_value, 0b000, 0b1010011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fmadds(float rs1, float rs2, float rs3) {


  float_conv_t opa, opb, opc, res;

  opa.float_value = rs1;

  opb.float_value = rs2;

  opc.float_value = rs3;


  res.binary_value = CUSTOM_INSTR_R4_TYPE(opc.binary_value, opb.binary_value, opa.binary_value, 0b000, 0b1000011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fmsubs(float rs1, float rs2, float rs3) {


  float_conv_t opa, opb, opc, res;

  opa.float_value = rs1;

  opb.float_value = rs2;

  opc.float_value = rs3;


  res.binary_value = CUSTOM_INSTR_R4_TYPE(opc.binary_value, opb.binary_value, opa.binary_value, 0b000, 0b1000111);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fnmsubs(float rs1, float rs2, float rs3) {


  float_conv_t opa, opb, opc, res;

  opa.float_value = rs1;

  opb.float_value = rs2;

  opc.float_value = rs3;


  res.binary_value = CUSTOM_INSTR_R4_TYPE(opc.binary_value, opb.binary_value, opa.binary_value, 0b000, 0b1001011);

  return res.float_value;

}


/**********************************************************************/


inline float __attribute__ ((always_inline)) riscv_intrinsic_fnmadds(float rs1, float rs2, float rs3) {


  float_conv_t opa, opb, opc, res;

  opa.float_value = rs1;

  opb.float_value = rs2;

  opc.float_value = rs3;


  res.binary_value = CUSTOM_INSTR_R4_TYPE(opc.binary_value, opb.binary_value, opa.binary_value, 0b000, 0b1001111);

  return res.float_value;

}


// ################################################################################################

// Emulation functions

// ################################################################################################


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fadds(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  float res = opa + opb;


  // make NAN canonical

  if (fpclassify(res) == FP_NAN) {

    res = NAN;

  }


  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fsubs(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  float res = opa - opb;


  // make NAN canonical

  if (fpclassify(res) == FP_NAN) {

    res = NAN;

  }


  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fmuls(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  float res = opa * opb;

  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fmins(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  union {

  uint32_t binary_value;

  float    float_value;

  } tmp_a, tmp_b;


  if ((fpclassify(opa) == FP_NAN) && (fpclassify(opb) == FP_NAN)) {

    return nanf("");

  }


  if (fpclassify(opa) == FP_NAN) {

    return opb;

  }


  if (fpclassify(opb) == FP_NAN) {

    return opa;

  }


  // RISC-V spec: -0 < +0

  tmp_a.float_value = opa;

  tmp_b.float_value = opb;

  if (((tmp_a.binary_value == 0x80000000) && (tmp_b.binary_value == 0x00000000)) ||

      ((tmp_a.binary_value == 0x00000000) && (tmp_b.binary_value == 0x80000000))) {

    return -0.0f;

  }


  return fmin(opa, opb);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fmaxs(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  union {

  uint32_t binary_value;

  float    float_value;

  } tmp_a, tmp_b;


  if ((fpclassify(opa) == FP_NAN) && (fpclassify(opb) == FP_NAN)) {

    return nanf("");

  }


  if (fpclassify(opa) == FP_NAN) {

    return opb;

  }


  if (fpclassify(opb) == FP_NAN) {

    return opa;

  }


  // RISC-V spec: -0 < +0

  tmp_a.float_value = opa;

  tmp_b.float_value = opb;

  if (((tmp_a.binary_value == 0x80000000) && (tmp_b.binary_value == 0x00000000)) ||

      ((tmp_a.binary_value == 0x00000000) && (tmp_b.binary_value == 0x80000000))) {

    return +0.0f;

  }


  return fmax(opa, opb);

}


/**********************************************************************/


uint32_t __attribute__ ((noinline)) riscv_emulate_fcvt_wus(float rs1) {


  float opa = subnormal_flush(rs1);


  return (uint32_t)rint(opa);

}


/**********************************************************************/


int32_t __attribute__ ((noinline)) riscv_emulate_fcvt_ws(float rs1) {


  float opa = subnormal_flush(rs1);


  return (int32_t)rint(opa);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fcvt_swu(uint32_t rs1) {


  return (float)rs1;

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fcvt_sw(int32_t rs1) {


  return (float)rs1;

}


/**********************************************************************/


uint32_t __attribute__ ((noinline)) riscv_emulate_feqs(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {

    return 0;

  }


  if isless(opa, opb) {

    return 0;

  }

  else if isgreater(opa, opb) {

    return 0;

  }

  else {

    return 1;

  }

}


/**********************************************************************/


uint32_t __attribute__ ((noinline)) riscv_emulate_flts(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {

    return 0;

  }


  if isless(opa, opb) {

    return 1;

  }

  else {

    return 0;

  }

}


/**********************************************************************/


uint32_t __attribute__ ((noinline)) riscv_emulate_fles(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  if ((fpclassify(opa) == FP_NAN) || (fpclassify(opb) == FP_NAN)) {

    return 0;

  }


  if islessequal(opa, opb) {

    return 1;

  }

  else {

    return 0;

  }

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fsgnjs(float rs1, float rs2) {


  float opa = rs1;

  float opb = rs2;


  int sign_1 = (int)signbit(opa);

  int sign_2 = (int)signbit(opb);

  float res = 0;


  if (sign_2 != 0) { // opb is negative

    if (sign_1 == 0) {

      res = -opa;

    }

    else {

      res = opa;

    }

  }

  else { // opb is positive

    if (sign_1 == 0) {

      res = opa;

    }

    else {

      res = -opa;

    }

  }


  return res;

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fsgnjns(float rs1, float rs2) {


  float opa = rs1;

  float opb = rs2;


  int sign_1 = (int)signbit(opa);

  int sign_2 = (int)signbit(opb);

  float res = 0;


  if (sign_2 != 0) { // opb is negative

    if (sign_1 == 0) {

      res = opa;

    }

    else {

      res = -opa;

    }

  }

  else { // opb is positive

    if (sign_1 == 0) {

      res = -opa;

    }

    else {

      res = opa;

    }

  }


  return res;

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fsgnjxs(float rs1, float rs2) {


  float opa = rs1;

  float opb = rs2;


  int sign_1 = (int)signbit(opa);

  int sign_2 = (int)signbit(opb);

  float res = 0;


  if (((sign_1 == 0) && (sign_2 != 0)) || ((sign_1 != 0) && (sign_2 == 0))) {

    if (sign_1 == 0) {

      res = -opa;

    }

    else {

      res = opa;

    }

  }

  else {

    if (sign_1 == 0) {

      res = opa;

    }

    else {

      res = -opa;

    }

  }


  return res;

}


/**********************************************************************/


uint32_t __attribute__ ((noinline)) riscv_emulate_fclasss(float rs1) {


  float opa = rs1;


  union {

    uint32_t binary_value;

    float    float_value;

  } aux;


  // RISC-V classify result layout

  const uint32_t CLASS_NEG_INF    = 1 << 0; // negative infinity

  const uint32_t CLASS_NEG_NORM   = 1 << 1; // negative normal number

  const uint32_t CLASS_NEG_DENORM = 1 << 2; // negative subnormal number

  const uint32_t CLASS_NEG_ZERO   = 1 << 3; // negative zero

  const uint32_t CLASS_POS_ZERO   = 1 << 4; // positive zero

  const uint32_t CLASS_POS_DENORM = 1 << 5; // positive subnormal number

  const uint32_t CLASS_POS_NORM   = 1 << 6; // positive normal number

  const uint32_t CLASS_POS_INF    = 1 << 7; // positive infinity

  const uint32_t CLASS_SNAN       = 1 << 8; // signaling NaN (sNaN)

  const uint32_t CLASS_QNAN       = 1 << 9; // quiet NaN (qNaN)


  int tmp = fpclassify(opa);

  int sgn = (int)signbit(opa);


  uint32_t res = 0;


  // infinity

  if (tmp == FP_INFINITE) {

    if (sgn) { res |= CLASS_NEG_INF; }

    else     { res |= CLASS_POS_INF; }

  }


  // zero

  if (tmp == FP_ZERO) {

    if (sgn) { res |= CLASS_NEG_ZERO; }

    else     { res |= CLASS_POS_ZERO; }

  }


  // normal

  if (tmp == FP_NORMAL) {

    if (sgn) { res |= CLASS_NEG_NORM; }

    else     { res |= CLASS_POS_NORM; }

  }


  // subnormal

  if (tmp == FP_SUBNORMAL) {

    if (sgn) { res |= CLASS_NEG_DENORM; }

    else     { res |= CLASS_POS_DENORM; }

  }


  // NaN

  if (tmp == FP_NAN) {

    aux.float_value = opa;

    if ((aux.binary_value >> 22) & 0b1) { // bit 22 (mantissa's MSB) is set -> canonical (quiet) NAN

      res |= CLASS_QNAN;

    }

    else {

      res |= CLASS_SNAN;

    }

  }


  return res;

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fdivs(float rs1, float rs2) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);


  float res = opa / opb;

  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fsqrts(float rs1) {


  float opa = subnormal_flush(rs1);


  float res = sqrtf(opa);

  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fmadds(float rs1, float rs2, float rs3) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);

  float opc = subnormal_flush(rs3);


  float res = (opa * opb) + opc;

  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fmsubs(float rs1, float rs2, float rs3) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);

  float opc = subnormal_flush(rs3);


  float res = (opa * opb) - opc;

  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fnmsubs(float rs1, float rs2, float rs3) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);

  float opc = subnormal_flush(rs3);


  float res = -(opa * opb) + opc;

  return subnormal_flush(res);

}


/**********************************************************************/


float __attribute__ ((noinline)) riscv_emulate_fnmadds(float rs1, float rs2, float rs3) {


  float opa = subnormal_flush(rs1);

  float opb = subnormal_flush(rs2);

  float opc = subnormal_flush(rs3);


  float res = -(opa * opb) - opc;

  return subnormal_flush(res);

}


#endif // neorv32_zfinx_extension_intrinsics_h


float_conv_t
Definition neorv32_zfinx_extension_intrinsics.h:76

float_conv_t::binary_value
uint32_t binary_value
Definition neorv32_zfinx_extension_intrinsics.h:77

float_conv_t::float_value
float float_value
Definition neorv32_zfinx_extension_intrinsics.h:78