Skip to content
This repository has been archived by the owner on May 14, 2024. It is now read-only.

Commit

Permalink
Merge pull request #55 from RadeonOpenCompute/roc-1.7.1
Browse files Browse the repository at this point in the history
roc-1.7.1 updates
  • Loading branch information
kzhuravl authored Mar 23, 2018
2 parents 738fe62 + def7b2c commit 1f1750f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 4 deletions.
11 changes: 11 additions & 0 deletions oclc/src/isa_version_902.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*===--------------------------------------------------------------------------
* ROCm Device Libraries
*
* This file is distributed under the University of Illinois Open Source
* License. See LICENSE.TXT for details.
*===------------------------------------------------------------------------*/

#include "oclc.h"

__attribute__((always_inline, const)) int __oclc_ISA_version(void) { return 902; }

39 changes: 35 additions & 4 deletions opencl/src/common/fclamp.cl
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,35 @@
* License. See LICENSE.TXT for details.
*===------------------------------------------------------------------------*/

#include "ockl.h"

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

#define ATTR __attribute__((always_inline, overloadable, const))

#define VLIST2 clamp(x.s0, lo.s0, hi.s0), clamp(x.s1, lo.s1, hi.s1)
#define VLIST3 VLIST2, clamp(x.s2, lo.s2, hi.s2)
#define VLIST4 VLIST3, clamp(x.s3, lo.s3, hi.s3)
#define VLIST8 VLIST4, clamp(x.s4, lo.s4, hi.s4), clamp(x.s5, lo.s5, hi.s5), clamp(x.s6, lo.s6, hi.s6), clamp(x.s7, lo.s7, hi.s7)
#define VLIST16 VLIST8, clamp(x.s8, lo.s8, hi.s8), clamp(x.s9, lo.s9, hi.s9), clamp(x.sa, lo.sa, hi.sa), clamp(x.sb, lo.sb, hi.sb), clamp(x.sc, lo.sc, hi.sc), clamp(x.sd, lo.sd, hi.sd), clamp(x.se, lo.se, hi.se), clamp(x.sf, lo.sf, hi.sf)

#define LIST2 clamp(x.s0, lo, hi), clamp(x.s1, lo, hi)
#define LIST3 LIST2, clamp(x.s2, lo, hi)
#define LIST4 LIST3, clamp(x.s3, lo, hi)
#define LIST8 LIST4, clamp(x.s4, lo, hi), clamp(x.s5, lo, hi), clamp(x.s6, lo, hi), clamp(x.s7, lo, hi)
#define LIST16 LIST8, clamp(x.s8, lo, hi), clamp(x.s9, lo, hi), clamp(x.sa, lo, hi), clamp(x.sb, lo, hi), clamp(x.sc, lo, hi), clamp(x.sd, lo, hi), clamp(x.se, lo, hi), clamp(x.sf, lo, hi)

#define GENN(N,T) \
ATTR T##N \
clamp(T##N x, T lo, T hi) \
{ \
return fmin(fmax(x, lo), hi); \
return (T##N)( LIST##N ); \
} \
\
ATTR T##N \
clamp(T##N x, T##N lo, T##N hi) \
{ \
return fmin(fmax(x, lo), hi); \
return (T##N) ( VLIST##N ); \
}

#define GEN1(T) \
Expand All @@ -34,10 +48,27 @@ clamp(T x, T lo, T hi) \
GENN(8,T) \
GENN(4,T) \
GENN(3,T) \
GENN(2,T) \
GEN1(T)
GENN(2,T)

GEN(float)
GEN(double)
GEN(half)

ATTR float
clamp(float x, float lo, float hi)
{
return __ockl_median3_f32(x, lo, hi);
}

ATTR double
clamp(double x, double lo, double hi)
{
return fmin(fmax(x, lo), hi);
}

ATTR half
clamp(half x, half lo, half hi)
{
return __ockl_median3_f16(x, lo, hi);
}

0 comments on commit 1f1750f

Please sign in to comment.