Skip to content
This repository has been archived by the owner on May 14, 2024. It is now read-only.

Commit

Permalink
Add division functions
Browse files Browse the repository at this point in the history
Change-Id: Ic79f2cc089d55a68800c74ea7232bee8be625385
  • Loading branch information
b-sumner committed Apr 24, 2019
1 parent fc7455f commit 6bfe121
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 0 deletions.
3 changes: 3 additions & 0 deletions ocml/inc/ocml.h
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,9 @@ extern __attribute__((const)) double2 OCML_MANGLE_F64(ctanh)(double2);
extern __attribute__((const)) float2 OCML_MANGLE_F32(csqrt)(float2);
extern __attribute__((const)) double2 OCML_MANGLE_F64(csqrt)(double2);

extern __attribute__((const)) float2 OCML_MANGLE_F32(cdiv)(float2, float2);
extern __attribute__((const)) double2 OCML_MANGLE_F64(cdiv)(double2, double2);

#pragma OPENCL EXTENSION cl_khr_fp16 : disable

#endif // OCML_H
68 changes: 68 additions & 0 deletions ocml/src/cdivD.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*===--------------------------------------------------------------------------
* ROCm Device Libraries
*
* This file is distributed under the University of Illinois Open Source
* License. See LICENSE.TXT for details.
*===------------------------------------------------------------------------*/

#include "mathD.h"

#define CP(A,B,C,D) ({ \
double _a = A; \
double _b = B; \
double _c = C; \
double _d = D; \
double _bd = _b * _d; \
double _e = BUILTIN_FMA_F64(_b, _d, -_bd); \
double _f = BUILTIN_FMA_F64(_a, _c, _bd); \
_f + _e; \
})


CONSTATTR double2
MATH_MANGLE(cdiv)(double2 zn, double2 zd)
{
double zdx = zd.x;
double zdy = zd.y;
bool g = BUILTIN_ABS_F64(zdx) > BUILTIN_ABS_F64(zdy);
int de = BUILTIN_FREXP_EXP_F64(g ? zdx : zdy);
zdx = BUILTIN_FLDEXP_F64(zdx, -de);
zdy = BUILTIN_FLDEXP_F64(zdy, -de);
double u = g ? zdx : zdy;
double v = g ? zdy : zdx;
double d2 = BUILTIN_FMA_F64(u, u, v*v);
double tr = CP(zn.x, zn.y, zdx, zdy);
double ti = CP(zn.y, -zn.x, zdx, zdy);
double nr = BUILTIN_FREXP_MANT_F64(tr);
double ni = BUILTIN_FREXP_MANT_F64(ti);
int er = BUILTIN_FREXP_EXP_F64(tr);
int ei = BUILTIN_FREXP_EXP_F64(ti);

double rr = BUILTIN_FLDEXP_F64(MATH_DIV(nr, d2), er - de);
double ri = BUILTIN_FLDEXP_F64(MATH_DIV(ni, d2), ei - de);

if (!FINITE_ONLY_OPT()) {
if (BUILTIN_ISNAN_F64(rr) && BUILTIN_ISNAN_F64(ri)) {
if (d2 == 0.0 && (!BUILTIN_ISNAN_F64(zn.x) || !BUILTIN_ISNAN_F64(zn.y))) {
double i = BUILTIN_COPYSIGN_F64(AS_DOUBLE(PINFBITPATT_DP64), zd.x);
rr = i * zn.x;
ri = i * zn.y;
} else if ((BUILTIN_ISINF_F64(zn.x) || BUILTIN_ISINF_F64(zn.y)) &&
(BUILTIN_ISFINITE_F64(zd.x) && BUILTIN_ISFINITE_F64(zd.y))) {
double znx = BUILTIN_COPYSIGN_F64(BUILTIN_ISINF_F64(zn.x) ? 1.0 : 0.0, zn.x);
double zny = BUILTIN_COPYSIGN_F64(BUILTIN_ISINF_F64(zn.y) ? 1.0 : 0.0, zn.y);
rr = AS_DOUBLE(PINFBITPATT_DP64) * MATH_MAD(znx, zd.x, zny * zd.y);
ri = AS_DOUBLE(PINFBITPATT_DP64) * MATH_MAD(zny, zd.x, -znx * zd.y);
} else if ((BUILTIN_ISINF_F64(zd.x) || BUILTIN_ISINF_F64(zd.y)) &&
(BUILTIN_ISFINITE_F64(zn.x) && BUILTIN_ISFINITE_F64(zn.y))) {
zdx = BUILTIN_COPYSIGN_F64(BUILTIN_ISINF_F64(zd.x) ? 1.0 : 0.0, zd.x);
zdy = BUILTIN_COPYSIGN_F64(BUILTIN_ISINF_F64(zd.y) ? 1.0 : 0.0, zd.y);
rr = 0.0 * MATH_MAD(zn.x, zdx, zn.y * zdy);
ri = 0.0 * MATH_MAD(zn.y, zdx, -zn.x * zdy);
}
}
}

return (double2)(rr, ri);
}

67 changes: 67 additions & 0 deletions ocml/src/cdivF.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*===--------------------------------------------------------------------------
* ROCm Device Libraries
*
* This file is distributed under the University of Illinois Open Source
* License. See LICENSE.TXT for details.
*===------------------------------------------------------------------------*/

#include "mathF.h"

#define CP(A,B,C,D) ({ \
float _a = A; \
float _b = B; \
float _c = C; \
float _d = D; \
float _bd = _b * _d; \
float _e = BUILTIN_FMA_F32(_b, _d, -_bd); \
float _f = BUILTIN_FMA_F32(_a, _c, _bd); \
_f + _e; \
})


CONSTATTR float2
MATH_MANGLE(cdiv)(float2 zn, float2 zd)
{
float zdx = zd.x;
float zdy = zd.y;
bool g = BUILTIN_ABS_F32(zdx) > BUILTIN_ABS_F32(zdy);
int de = BUILTIN_FREXP_EXP_F32(g ? zdx : zdy);
zdx = BUILTIN_FLDEXP_F32(zdx, -de);
zdy = BUILTIN_FLDEXP_F32(zdy, -de);
float u = g ? zdx : zdy;
float v = g ? zdy : zdx;
float d2 = BUILTIN_FMA_F32(u, u, v*v);
float tr = CP(zn.x, zn.y, zdx, zdy);
float ti = CP(zn.y, -zn.x, zdx, zdy);
float nr = BUILTIN_FREXP_MANT_F32(tr);
float ni = BUILTIN_FREXP_MANT_F32(ti);
int er = BUILTIN_FREXP_EXP_F32(tr);
int ei = BUILTIN_FREXP_EXP_F32(ti);
float rr = BUILTIN_FLDEXP_F32(MATH_FAST_DIV(nr, d2), er - de);
float ri = BUILTIN_FLDEXP_F32(MATH_FAST_DIV(ni, d2), ei - de);

if (!FINITE_ONLY_OPT()) {
if (BUILTIN_ISNAN_F32(rr) && BUILTIN_ISNAN_F32(ri)) {
if (d2 == 0.0f && (!BUILTIN_ISNAN_F32(zn.x) || !BUILTIN_ISNAN_F32(zn.y))) {
float i = BUILTIN_COPYSIGN_F32(AS_FLOAT(PINFBITPATT_SP32), zd.x);
rr = i * zn.x;
ri = i * zn.y;
} else if ((BUILTIN_ISINF_F32(zn.x) || BUILTIN_ISINF_F32(zn.y)) &&
(BUILTIN_ISFINITE_F32(zd.x) && BUILTIN_ISFINITE_F32(zd.y))) {
float znx = BUILTIN_COPYSIGN_F32(BUILTIN_ISINF_F32(zn.x) ? 1.0f : 0.0f, zn.x);
float zny = BUILTIN_COPYSIGN_F32(BUILTIN_ISINF_F32(zn.y) ? 1.0f : 0.0f, zn.y);
rr = AS_FLOAT(PINFBITPATT_SP32) * MATH_MAD(znx, zd.x, zny * zd.y);
ri = AS_FLOAT(PINFBITPATT_SP32) * MATH_MAD(zny, zd.x, -znx * zd.y);
} else if ((BUILTIN_ISINF_F32(zd.x) || BUILTIN_ISINF_F32(zd.y)) &&
(BUILTIN_ISFINITE_F32(zn.x) && BUILTIN_ISFINITE_F32(zn.y))) {
zdx = BUILTIN_COPYSIGN_F32(BUILTIN_ISINF_F32(zd.x) ? 1.0f : 0.0f, zd.x);
zdy = BUILTIN_COPYSIGN_F32(BUILTIN_ISINF_F32(zd.y) ? 1.0f : 0.0f, zd.y);
rr = 0.0f * MATH_MAD(zn.x, zdx, zn.y * zdy);
ri = 0.0f * MATH_MAD(zn.y, zdx, -zn.x * zdy);
}
}
}

return (float2)(rr, ri);
}

0 comments on commit 6bfe121

Please sign in to comment.