diff --git a/ocml/inc/ocml.h b/ocml/inc/ocml.h
index 8a94c5c0..84debb12 100644
--- a/ocml/inc/ocml.h
+++ b/ocml/inc/ocml.h
@@ -670,6 +670,24 @@ DECL_CONST_OCML_UNARY_F16(native_log2)
 extern __attribute__((const)) float OCML_MANGLE_F32(cabs)(float2);
 extern __attribute__((const)) double OCML_MANGLE_F64(cabs)(double2);
 
+extern __attribute__((const)) float2 OCML_MANGLE_F32(cacos)(float2);
+extern __attribute__((const)) double2 OCML_MANGLE_F64(cacos)(double2);
+
+extern __attribute__((const)) float2 OCML_MANGLE_F32(cacosh)(float2);
+extern __attribute__((const)) double2 OCML_MANGLE_F64(cacosh)(double2);
+
+extern __attribute__((const)) float2 OCML_MANGLE_F32(casin)(float2);
+extern __attribute__((const)) double2 OCML_MANGLE_F64(casin)(double2);
+
+extern __attribute__((const)) float2 OCML_MANGLE_F32(casinh)(float2);
+extern __attribute__((const)) double2 OCML_MANGLE_F64(casinh)(double2);
+
+extern __attribute__((const)) float2 OCML_MANGLE_F32(catan)(float2);
+extern __attribute__((const)) double2 OCML_MANGLE_F64(catan)(double2);
+
+extern __attribute__((const)) float2 OCML_MANGLE_F32(catanh)(float2);
+extern __attribute__((const)) double2 OCML_MANGLE_F64(catanh)(double2);
+
 extern __attribute__((const)) float2 OCML_MANGLE_F32(cexp)(float2);
 extern __attribute__((const)) double2 OCML_MANGLE_F64(cexp)(double2);
 
diff --git a/ocml/src/acoshD.cl b/ocml/src/acoshD.cl
index e2424edb..3cacbf9f 100644
--- a/ocml/src/acoshD.cl
+++ b/ocml/src/acoshD.cl
@@ -21,8 +21,6 @@ MATH_MANGLE(acosh)(double x)
     double2 a = add(sx, root2(sub(sqr(sx), s*s)));
     double z = MATH_PRIVATE(lnep)(a, b ? 512 : 0);
 
-    z = x == 1.0 ? 0.0 : z;
-
     if (!FINITE_ONLY_OPT()) {
         z = BUILTIN_CLASS_F64(x, CLASS_PINF) ? x : z;
         z = x < 1.0 ? AS_DOUBLE(QNANBITPATT_DP64) : z;
diff --git a/ocml/src/acoshF.cl b/ocml/src/acoshF.cl
index 5e5cc1f1..699330ca 100644
--- a/ocml/src/acoshF.cl
+++ b/ocml/src/acoshF.cl
@@ -21,8 +21,6 @@ MATH_MANGLE(acosh)(float x)
     float2 a = add(sx, root2(sub(sqr(sx), s*s)));
     float z = MATH_PRIVATE(lnep)(a, b ? 64 : 0);
 
-    z = x == 1.0f ? 0.0f : z;
-
     if (!FINITE_ONLY_OPT()) {
         z = BUILTIN_CLASS_F32(x, CLASS_PINF) ? x : z;
         z = x < 1.0f ? AS_FLOAT(QNANBITPATT_SP32) : z;
diff --git a/ocml/src/cacosD.cl b/ocml/src/cacosD.cl
new file mode 100644
index 00000000..ac468011
--- /dev/null
+++ b/ocml/src/cacosD.cl
@@ -0,0 +1,17 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathD.h"
+
+CONSTATTR double2
+MATH_MANGLE(cacos)(double2 z)
+{
+    double2 a = MATH_MANGLE(cacosh)(z);
+    bool b = AS_INT2(z.y).hi < 0;
+    return (double2)(b ? -a.y : a.y, b ? a.x : -a.x);
+}
+
diff --git a/ocml/src/cacosF.cl b/ocml/src/cacosF.cl
new file mode 100644
index 00000000..e20b7d90
--- /dev/null
+++ b/ocml/src/cacosF.cl
@@ -0,0 +1,17 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathF.h"
+
+CONSTATTR float2
+MATH_MANGLE(cacos)(float2 z)
+{
+    float2 a = MATH_MANGLE(cacosh)(z);
+    bool b = AS_INT(z.y) < 0;
+    return (float2)(b ? -a.y : a.y, b ? a.x : -a.x);
+}
+
diff --git a/ocml/src/cacoshD.cl b/ocml/src/cacoshD.cl
new file mode 100644
index 00000000..cbb10cd6
--- /dev/null
+++ b/ocml/src/cacoshD.cl
@@ -0,0 +1,64 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathD.h"
+
+#define DOUBLE_SPECIALIZATION
+#include "ep.h"
+
+extern CONSTATTR double4 MATH_PRIVATE(epcsqrtep)(double4 z);
+extern CONSTATTR double MATH_PRIVATE(lnep)(double2 a, int ea);
+
+CONSTATTR double2
+MATH_MANGLE(cacosh)(double2 z)
+{
+    double x = BUILTIN_ABS_F64(z.x);
+    double y = BUILTIN_ABS_F64(z.y);
+
+    double2 l2, t;
+    int e = 0;
+    bool b = true;
+
+    if (x < 0x1.0p+54 && y < 0x1.0p+54) {
+        if (x >= 1.0 || y >= 0x1.0p-53 || y > (1.0 - x)*0x1.0p-26) {
+            double4 z2p1 = (double4)(add(mul(add(y,x), sub(y,x)), 1.0), mul(y,x)*2.0);
+            double4 rz2m1 = MATH_PRIVATE(epcsqrtep)(z2p1);
+            rz2m1 = (double4)(csgn(rz2m1.hi, (double2)z.x), csgn(rz2m1.lo, (double2)z.y));
+            double4 s = (double4)(add(rz2m1.lo, z.x), add(rz2m1.hi, z.y));
+            l2 = add(sqr(s.lo), sqr(s.hi));
+            t = (double2)(s.s1, z.y == 0.0 ? z.y : s.s3);
+        } else {
+            b = false;
+            double r = MATH_FAST_SQRT(BUILTIN_FMA_F64(-x, x, 1.0));
+            l2 = con(MATH_DIV(y, r), 0.0);
+            t = (double2)(z.x, BUILTIN_COPYSIGN_F64(r, z.y));
+        }
+    } else {
+        e = BUILTIN_FREXP_EXP_F64(BUILTIN_MAX_F64(x,y));
+        x = BUILTIN_FLDEXP_F64(x, -e);
+        y = BUILTIN_FLDEXP_F64(y, -e);
+        l2 = add(sqr(x), sqr(y));
+        e = 2*e + 2;
+        t = z;
+    }
+
+    double rr;
+    if (b) {
+        rr = 0.5 * MATH_PRIVATE(lnep)(l2, e);
+    } else {
+        rr = l2.hi;
+    }
+
+    double ri = MATH_MANGLE(atan2)(t.y, t.x);
+
+    if (!FINITE_ONLY_OPT()) {
+        rr = (BUILTIN_ISINF_F64(z.x) | BUILTIN_ISINF_F64(z.y)) ? AS_DOUBLE(PINFBITPATT_DP64) : rr;
+    }
+
+    return (double2)(rr, ri);
+}
+
diff --git a/ocml/src/cacoshF.cl b/ocml/src/cacoshF.cl
new file mode 100644
index 00000000..14c151b9
--- /dev/null
+++ b/ocml/src/cacoshF.cl
@@ -0,0 +1,64 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathF.h"
+
+#define FLOAT_SPECIALIZATION
+#include "ep.h"
+
+extern CONSTATTR float4 MATH_PRIVATE(epcsqrtep)(float4 z);
+extern CONSTATTR float MATH_PRIVATE(lnep)(float2 a, int ea);
+
+CONSTATTR float2
+MATH_MANGLE(cacosh)(float2 z)
+{
+    float x = BUILTIN_ABS_F32(z.x);
+    float y = BUILTIN_ABS_F32(z.y);
+
+    float2 l2, t;
+    int e = 0;
+    bool b = true;
+
+    if (x < 0x1.0p+25f && y < 0x1.0p+25f) {
+        if (x >= 1.0f || y >= 0x1.0p-24f || y > (1.0f - x)*0x1.0p-12f) {
+            float4 z2p1 = (float4)(add(mul(add(y,x), sub(y,x)), 1.0f), mul(y,x)*2.0f);
+            float4 rz2m1 = MATH_PRIVATE(epcsqrtep)(z2p1);
+            rz2m1 = (float4)(csgn(rz2m1.hi, (float2)z.x), csgn(rz2m1.lo, (float2)z.y));
+            float4 s = (float4)(add(rz2m1.lo, z.x), add(rz2m1.hi, z.y));
+            l2 = add(sqr(s.lo), sqr(s.hi));
+            t = (float2)(s.s1, z.y == 0.0f ? z.y : s.s3);
+        } else {
+            b = false;
+            float r = MATH_SQRT(BUILTIN_FMA_F32(-x, x, 1.0f));
+            l2 = con(MATH_DIV(y, r), 0.0f);
+            t = (float2)(z.x, BUILTIN_COPYSIGN_F32(r, z.y));
+        }
+    } else {
+        e = BUILTIN_FREXP_EXP_F32(AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(x), AS_UINT(y))));
+        x = BUILTIN_FLDEXP_F32(x, -e);
+        y = BUILTIN_FLDEXP_F32(y, -e);
+        l2 = add(sqr(x), sqr(y));
+        e = 2*e + 2;
+        t = z;
+    }
+
+    float rr;
+    if (b) {
+        rr = 0.5f * MATH_PRIVATE(lnep)(l2, e);
+    } else {
+        rr = l2.hi;
+    }
+
+    float ri = MATH_MANGLE(atan2)(t.y, t.x);
+
+    if (!FINITE_ONLY_OPT()) {
+        rr = (BUILTIN_ISINF_F32(z.x) | BUILTIN_ISINF_F32(z.y)) ? AS_FLOAT(PINFBITPATT_SP32) : rr;
+    }
+
+    return (float2)(rr, ri);
+}
+
diff --git a/ocml/src/casinD.cl b/ocml/src/casinD.cl
new file mode 100644
index 00000000..d0bafe12
--- /dev/null
+++ b/ocml/src/casinD.cl
@@ -0,0 +1,16 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathD.h"
+
+CONSTATTR double2
+MATH_MANGLE(casin)(double2 z)
+{
+    double2 a = MATH_MANGLE(casinh)((double2)(-z.y, z.x));
+    return (double2)(a.y, -a.x);
+}
+
diff --git a/ocml/src/casinF.cl b/ocml/src/casinF.cl
new file mode 100644
index 00000000..1189c599
--- /dev/null
+++ b/ocml/src/casinF.cl
@@ -0,0 +1,16 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathF.h"
+
+CONSTATTR float2
+MATH_MANGLE(casin)(float2 z)
+{
+    float2 a = MATH_MANGLE(casinh)((float2)(-z.y, z.x));
+    return (float2)(a.y, -a.x);
+}
+
diff --git a/ocml/src/casinhD.cl b/ocml/src/casinhD.cl
new file mode 100644
index 00000000..6d6b096d
--- /dev/null
+++ b/ocml/src/casinhD.cl
@@ -0,0 +1,65 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathD.h"
+
+#define DOUBLE_SPECIALIZATION
+#include "ep.h"
+
+extern CONSTATTR double4 MATH_PRIVATE(epcsqrtep)(double4 z);
+extern CONSTATTR double MATH_PRIVATE(lnep)(double2 a, int ea);
+
+CONSTATTR double2
+MATH_MANGLE(casinh)(double2 z)
+{
+    double x = BUILTIN_ABS_F64(z.x);
+    double y = BUILTIN_ABS_F64(z.y);
+
+    double2 l2, t;
+    int e = 0;
+    bool b = true;
+
+    if (x < 0x1.0p+54 && y < 0x1.0p+54) {
+        if (y >= 1.0 || x >= 0x1.0p-53 || x > (1.0 - y)*0x1.0p-26f) {
+            double4 z2p1 = (double4)(add(mul(add(x,y), sub(x,y)), 1.0), mul(y,x)*2.0);
+            double4 rz2p1 = MATH_PRIVATE(epcsqrtep)(z2p1);
+            double4 s = (double4)(add(rz2p1.lo, x), add(rz2p1.hi, y));
+            l2 = add(sqr(s.lo), sqr(s.hi));
+            t = (double2)(s.s1, s.s3);
+        } else {
+            b = false;
+            double r = MATH_SQRT(BUILTIN_FMA_F64(-y, y, 1.0));
+            l2 = con(MATH_DIV(x, r), 0.0);
+            t = (double2)(r, y);
+        }
+    } else {
+        t = (double2)(x, y);
+        e = BUILTIN_FREXP_EXP_F64(BUILTIN_MAX_F64(x, y));
+        x = BUILTIN_FLDEXP_F64(x, -e);
+        y = BUILTIN_FLDEXP_F64(y, -e);
+        l2 = add(sqr(x), sqr(y));
+        e = 2*e + 2;
+    }
+
+    double rr;
+    if (b) {
+        rr = 0.5 * MATH_PRIVATE(lnep)(l2, e);
+    } else {
+        rr = l2.hi;
+    }
+
+    rr = BUILTIN_COPYSIGN_F64(rr, z.x);
+    double ri = BUILTIN_COPYSIGN_F64(MATH_MANGLE(atan2)(t.y, t.x), z.y);
+
+    if (!FINITE_ONLY_OPT()) {
+        double i = BUILTIN_COPYSIGN_F64(AS_DOUBLE(PINFBITPATT_DP64), z.x);
+        rr = (BUILTIN_ISINF_F64(z.x) | BUILTIN_ISINF_F64(z.y)) ? i : rr;
+    }
+
+    return (double2)(rr, ri);
+}
+
diff --git a/ocml/src/casinhF.cl b/ocml/src/casinhF.cl
new file mode 100644
index 00000000..64624329
--- /dev/null
+++ b/ocml/src/casinhF.cl
@@ -0,0 +1,65 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathF.h"
+
+#define FLOAT_SPECIALIZATION
+#include "ep.h"
+
+extern CONSTATTR float4 MATH_PRIVATE(epcsqrtep)(float4 z);
+extern CONSTATTR float MATH_PRIVATE(lnep)(float2 a, int ea);
+
+CONSTATTR float2
+MATH_MANGLE(casinh)(float2 z)
+{
+    float x = BUILTIN_ABS_F32(z.x);
+    float y = BUILTIN_ABS_F32(z.y);
+
+    float2 l2, t;
+    int e = 0;
+    bool b = true;
+
+    if (x < 0x1.0p+25f && y < 0x1.0p+25f) {
+        if (y >= 1.0f || x >= 0x1.0p-24f || x > (1.0f - y)*0x1.0p-12f) {
+            float4 z2p1 = (float4)(add(mul(add(x,y), sub(x,y)), 1.0f), mul(y,x)*2.0f);
+            float4 rz2p1 = MATH_PRIVATE(epcsqrtep)(z2p1);
+            float4 s = (float4)(add(rz2p1.lo, x), add(rz2p1.hi, y));
+            l2 = add(sqr(s.lo), sqr(s.hi));
+            t = (float2)(s.s1, s.s3);
+        } else {
+            b = false;
+            float r = MATH_SQRT(BUILTIN_FMA_F32(-y, y, 1.0f));
+            l2 = con(MATH_DIV(x, r), 0.0f);
+            t = (float2)(r, y);
+        }
+    } else {
+        t = (float2)(x, y);
+        e = BUILTIN_FREXP_EXP_F32(AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(x), AS_UINT(y))));
+        x = BUILTIN_FLDEXP_F32(x, -e);
+        y = BUILTIN_FLDEXP_F32(y, -e);
+        l2 = add(sqr(x), sqr(y));
+        e = 2*e + 2;
+    }
+
+    float rr;
+    if (b) {
+        rr = 0.5f * MATH_PRIVATE(lnep)(l2, e);
+    } else {
+        rr = l2.hi;
+    }
+
+    rr = BUILTIN_COPYSIGN_F32(rr, z.x);
+    float ri = BUILTIN_COPYSIGN_F32(MATH_MANGLE(atan2)(t.y, t.x), z.y);
+
+    if (!FINITE_ONLY_OPT()) {
+        float i = BUILTIN_COPYSIGN_F32(AS_FLOAT(PINFBITPATT_SP32), z.x);
+        rr = (BUILTIN_ISINF_F32(z.x) | BUILTIN_ISINF_F32(z.y)) ? i : rr;
+    }
+
+    return (float2)(rr, ri);
+}
+
diff --git a/ocml/src/catanD.cl b/ocml/src/catanD.cl
new file mode 100644
index 00000000..0c3cf43c
--- /dev/null
+++ b/ocml/src/catanD.cl
@@ -0,0 +1,16 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathD.h"
+
+CONSTATTR double2
+MATH_MANGLE(catan)(double2 z)
+{
+    double2 a = MATH_MANGLE(catanh)((double2)(-z.y, z.x));
+    return (double2)(a.y, -a.x);
+}
+
diff --git a/ocml/src/catanF.cl b/ocml/src/catanF.cl
new file mode 100644
index 00000000..55715a59
--- /dev/null
+++ b/ocml/src/catanF.cl
@@ -0,0 +1,16 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathF.h"
+
+CONSTATTR float2
+MATH_MANGLE(catan)(float2 z)
+{
+    float2 a = MATH_MANGLE(catanh)((float2)(-z.y, z.x));
+    return (float2)(a.y, -a.x);
+}
+
diff --git a/ocml/src/catanhD.cl b/ocml/src/catanhD.cl
new file mode 100644
index 00000000..1539e12b
--- /dev/null
+++ b/ocml/src/catanhD.cl
@@ -0,0 +1,59 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathD.h"
+
+#define DOUBLE_SPECIALIZATION
+#include "ep.h"
+
+extern CONSTATTR double MATH_PRIVATE(lnep)(double2 a, int ea);
+
+CONSTATTR double2
+MATH_MANGLE(catanh)(double2 z)
+{
+    double x = BUILTIN_ABS_F64(z.x);
+    double y = BUILTIN_ABS_F64(z.y);
+    double rr, ri;
+
+    if (x < 0x1.0p+54 && y < 0x1.0p+54) {
+        double2 omx = sub(1.0, x);
+        double2 opx = add(1.0, x);
+        double2 y2 = sqr(y);
+        double2 b = sub(mul(omx, opx), y2);
+        ri = 0.5 * MATH_MANGLE(atan2)(2.0 * y, b.hi);
+
+        double2 a;
+        double2 d = add(sqr(opx), y2);
+        if (x < 0x1.0p-3 * d.hi) {
+            a = fsub(1.0, div(4.0*x, d));
+        } else {
+            a = div(add(sqr(omx), y2), d);
+        }
+        rr = -0.25 * MATH_PRIVATE(lnep)(a, 0);
+    } else {
+        int e = BUILTIN_FREXP_EXP_F64(BUILTIN_MAX_F64(x, y));
+        x = BUILTIN_FLDEXP_F64(x, -e);
+        y = BUILTIN_FLDEXP_F64(y, -e);
+        rr = BUILTIN_FLDEXP_F64(MATH_DIV(x, MATH_MAD(x, x, y*y)), -e);
+        ri = 0x1.921fb54442d18p+0;
+    }
+
+    if (!FINITE_ONLY_OPT()) {
+        rr = ((x == 1.0) & (y == 0.0)) ? AS_DOUBLE(PINFBITPATT_DP64)  : rr;
+        rr = x == 0.0 ? 0.0 : rr;
+        rr = BUILTIN_ISINF_F64(x) ? 0.0 : rr;
+        rr = (BUILTIN_ISNAN_F64(x) & BUILTIN_ISINF_F64(y)) ? 0.0 : rr;
+        ri = (BUILTIN_ISNAN_F64(x) & BUILTIN_ISFINITE_F64(y)) ? AS_DOUBLE(QNANBITPATT_DP64) : ri;
+        ri = BUILTIN_ISNAN_F64(y) ? y : ri;
+    }
+
+    rr = BUILTIN_COPYSIGN_F64(rr, z.x);
+    ri = BUILTIN_COPYSIGN_F64(ri, z.y);
+
+    return (double2)(rr, ri);
+}
+
diff --git a/ocml/src/catanhF.cl b/ocml/src/catanhF.cl
new file mode 100644
index 00000000..e0267eed
--- /dev/null
+++ b/ocml/src/catanhF.cl
@@ -0,0 +1,59 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathF.h"
+
+#define FLOAT_SPECIALIZATION
+#include "ep.h"
+
+extern CONSTATTR float MATH_PRIVATE(lnep)(float2 a, int ea);
+
+CONSTATTR float2
+MATH_MANGLE(catanh)(float2 z)
+{
+    float x = BUILTIN_ABS_F32(z.x);
+    float y = BUILTIN_ABS_F32(z.y);
+    float rr, ri;
+
+    if (x < 0x1.0p+25f && y < 0x1.0p+25f) {
+        float2 omx = sub(1.0f, x);
+        float2 opx = add(1.0f, x);
+        float2 y2 = sqr(y);
+        float2 b = sub(mul(omx, opx), y2);
+        ri = 0.5f * MATH_MANGLE(atan2)(2.0f * y, b.hi);
+
+        float2 a;
+        float2 d = add(sqr(opx), y2);
+        if (x < 0x1.0p-3f * d.hi) {
+            a = fsub(1.0f, div(4.0f*x, d));
+        } else {
+            a = div(add(sqr(omx), y2), d);
+        }
+        rr = -0.25f * MATH_PRIVATE(lnep)(a, 0);
+    } else {
+        int e = BUILTIN_FREXP_EXP_F32(AS_FLOAT(BUILTIN_MAX_U32(AS_UINT(x), AS_UINT(y))));
+        x = BUILTIN_FLDEXP_F32(x, -e);
+        y = BUILTIN_FLDEXP_F32(y, -e);
+        rr = BUILTIN_FLDEXP_F32(MATH_DIV(x, MATH_MAD(x, x, y*y)), -e);
+        ri = 0x1.921fb6p+0f;
+    }
+
+    if (!FINITE_ONLY_OPT()) {
+        rr = ((x == 1.0f) & (y == 0.0f)) ? AS_FLOAT(PINFBITPATT_SP32)  : rr;
+        rr = x == 0.0f ? 0.0f : rr;
+        rr = BUILTIN_ISINF_F32(x) ? 0.0f : rr;
+        rr = (BUILTIN_ISNAN_F32(x) & BUILTIN_ISINF_F32(y)) ? 0.0f : rr;
+        ri = (BUILTIN_ISNAN_F32(x) & BUILTIN_ISFINITE_F32(y)) ? AS_FLOAT(QNANBITPATT_SP32) : ri;
+        ri = BUILTIN_ISNAN_F32(y) ? y : ri;
+    }
+
+    rr = BUILTIN_COPYSIGN_F32(rr, z.x);
+    ri = BUILTIN_COPYSIGN_F32(ri, z.y);
+
+    return (float2)(rr, ri);
+}
+
diff --git a/ocml/src/ep.h b/ocml/src/ep.h
index e3bc73bc..3313a129 100644
--- a/ocml/src/ep.h
+++ b/ocml/src/ep.h
@@ -12,7 +12,8 @@
 #define ISINF(X) BUILTIN_ISINF_F32(X)
 #define USE_FMA HAVE_FAST_FMA32()
 #define HIGH(X) AS_FLOAT(AS_UINT(X) & 0xfffff000U)
-#define COPYSIGN BUILTIN_COPYSIGN_F64
+#define SIGNBIT(X) (AS_INT(X) < 0)
+#define SAMESIGN(X,Y) ((AS_INT(X)& 0x80000000) == (AS_INT(Y) & 0x80000000))
 #endif
 
 #if defined DOUBLE_SPECIALIZATION
@@ -26,7 +27,8 @@
 #define ISINF(X) BUILTIN_ISINF_F64(X)
 #define USE_FMA true
 #define HIGH(X) AS_DOUBLE(AS_ULONG(X) & 0xfffffffff8000000UL)
-#define COPYSIGN BUILTIN_COPYSIGN_F32
+#define SIGNBIT(X) (AS_INT2(X).hi < 0)
+#define SAMESIGN(X,Y) ((AS_INT2(X).hi & 0x80000000) == (AS_INT2(Y).hi & 0x80000000))
 #endif
 
 #if defined HALF_SPECIALIZATION
@@ -40,25 +42,26 @@
 #define ISINF(X) BUILTIN_ISINF_F16(X)
 #define USE_FMA true
 #define HIGH(X) AS_HALF(AS_USHORT(X) & (ushort)0xffc0U)
-#define COPYSIGN BUILTIN_COPYSIGN_F16
+#define SIGNBIT(X) (AS_SHORT(X) < (short)0)
+#define SAMESIGN(X,Y) ((AS_USHORT(X) & (ushort)0x8000) == (AS_USHORT(Y) & (ushort)0x8000))
 #endif
 
 static ATTR T2
-con(T a, T b)
+absv(T2 a)
 {
-    return (T2)(b, a);
+    return SIGNBIT(a.hi) ? -a : a;
 }
 
 static ATTR T2
-csgn(T2 a, T b)
+csgn(T2 a, T2 b)
 {
-    return con(COPYSIGN(a.hi, b), COPYSIGN(a.lo, b));
+    return SAMESIGN(a.hi, b.hi) ? a : -a;
 }
 
 static ATTR T2
-csgn(T2 a, T2 b)
+con(T a, T b)
 {
-    return con(COPYSIGN(a.hi, b.hi), COPYSIGN(a.lo, b.lo));
+    return (T2)(b, a);
 }
 
 static ATTR T2
@@ -430,7 +433,7 @@ root2(T a)
     T shi = SQRT(a);
     T2 e = fsub(a, sqr(shi));
     T slo = DIV(e.hi, (T)2 * shi);
-    return fadd(shi, slo);
+    return fadd(shi, a == (T)0 ? (T)0 : slo);
 }
 
 static ATTR T2
@@ -439,7 +442,7 @@ root2(T2 a)
     T shi = SQRT(a.hi);
     T2 e = fsub(a, sqr(shi));
     T slo = DIV(e.hi, (T)2 * shi);
-    return fadd(shi, slo);
+    return fadd(shi, a.hi == (T)0 ? (T)0 : slo);
 }
 
 #undef ATTR
@@ -454,4 +457,6 @@ root2(T2 a)
 #undef USE_FMA
 #undef HIGH
 #undef COPYSIGN
+#undef SIGNBIT
+#undef SAMESIGN
 
diff --git a/ocml/src/epcsqrtepD.cl b/ocml/src/epcsqrtepD.cl
new file mode 100644
index 00000000..ce95a7f9
--- /dev/null
+++ b/ocml/src/epcsqrtepD.cl
@@ -0,0 +1,26 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathD.h"
+
+#define DOUBLE_SPECIALIZATION
+#include "ep.h"
+
+CONSTATTR double4
+MATH_PRIVATE(epcsqrtep)(double4 z)
+{
+    double2 x = z.lo;
+    double2 y = z.hi;
+    double2 u = root2(fadd(root2(add(sqr(x), sqr(y))), absv(x)) * 0.5);
+    double2 v = absv(fdiv(y, u) * 0.5);
+    v = ((y.hi == 0.0) & (u.hi == 0.0)) ? y : v;
+    bool b = x.hi >= 0.0;
+    double2 s = b ? u : v;
+    double2 t = csgn(b ? v : u, y);
+    return (double4)(s, t);
+}
+
diff --git a/ocml/src/epcsqrtepF.cl b/ocml/src/epcsqrtepF.cl
new file mode 100644
index 00000000..d8dcbd35
--- /dev/null
+++ b/ocml/src/epcsqrtepF.cl
@@ -0,0 +1,26 @@
+/*===--------------------------------------------------------------------------
+ *                   ROCm Device Libraries
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *===------------------------------------------------------------------------*/
+
+#include "mathF.h"
+
+#define FLOAT_SPECIALIZATION
+#include "ep.h"
+
+CONSTATTR float4
+MATH_PRIVATE(epcsqrtep)(float4 z)
+{
+    float2 x = z.lo;
+    float2 y = z.hi;
+    float2 u = root2(fadd(root2(add(sqr(x), sqr(y))), absv(x)) * 0.5f);
+    float2 v = absv(fdiv(y, u) * 0.5f);
+    v = ((y.hi == 0.0f) & (u.hi == 0.0f)) ? y : v;
+    bool b = x.hi >= 0.0f;
+    float2 s = b ? u : v;
+    float2 t = csgn(b ? v : u, y);
+    return (float4)(s, t);
+}
+
diff --git a/ocml/src/hypotD.cl b/ocml/src/hypotD.cl
index 8c4f7db5..dffa6b70 100644
--- a/ocml/src/hypotD.cl
+++ b/ocml/src/hypotD.cl
@@ -17,7 +17,6 @@ MATH_MANGLE(hypot)(double x, double y)
     a = BUILTIN_FLDEXP_F64(a, -e);
     b = BUILTIN_FLDEXP_F64(b, -e);
     double ret = BUILTIN_FLDEXP_F64(MATH_FAST_SQRT(MATH_MAD(a, a, b*b)), e);
-    ret = t == 0.0 ? 0.0 : ret;
 
     if (!FINITE_ONLY_OPT()) {
         ret = BUILTIN_ISNAN_F64(x) |
diff --git a/ocml/src/len3D.cl b/ocml/src/len3D.cl
index 3a95b261..dbe747b8 100644
--- a/ocml/src/len3D.cl
+++ b/ocml/src/len3D.cl
@@ -29,7 +29,6 @@ MATH_MANGLE(len3)(double x, double y, double z)
     c = BUILTIN_FLDEXP_F64(c, -e);
 
     double ret = BUILTIN_FLDEXP_F64(MATH_FAST_SQRT(MATH_MAD(a, a, MATH_MAD(b, b, c*c))), e);
-    ret = a == 0.0 ? 0.0 : ret;
 
     if (!FINITE_ONLY_OPT()) {
         ret = (BUILTIN_ISNAN_F64(x) |
diff --git a/ocml/src/len4D.cl b/ocml/src/len4D.cl
index 4047a729..71c559a5 100644
--- a/ocml/src/len4D.cl
+++ b/ocml/src/len4D.cl
@@ -37,7 +37,6 @@ MATH_MANGLE(len4)(double x, double y, double z, double w)
     d = BUILTIN_FLDEXP_F64(d, -e);
 
     double ret = BUILTIN_FLDEXP_F64(MATH_FAST_SQRT(MATH_MAD(a, a, MATH_MAD(b, b, MATH_MAD(c, c, d*d)))), e);
-    ret = a == 0.0 ? 0.0 : ret;
 
     if (!FINITE_ONLY_OPT()) {
         ret = (BUILTIN_ISNAN_F64(x) | BUILTIN_ISNAN_F64(y) |
diff --git a/ocml/src/privD.h b/ocml/src/privD.h
index ff6cad46..d73bfc10 100644
--- a/ocml/src/privD.h
+++ b/ocml/src/privD.h
@@ -57,6 +57,7 @@
     double _fsqrt_s1 = BUILTIN_FMA_F64(_fsqrt_s0, _fsqrt_r0, _fsqrt_s0); \
     double _fsqrt_d0 = BUILTIN_FMA_F64(-_fsqrt_s1, _fsqrt_s1, _fsqrt_x); \
     double _fsqrt_ret = BUILTIN_FMA_F64(_fsqrt_d0, _fsqrt_h1, _fsqrt_s1); \
+    _fsqrt_ret = _fsqrt_x == 0.0 ? _fsqrt_x : _fsqrt_ret; \
     _fsqrt_ret; \
 })