exponential polynomial approximation #1346

ZvRzyan18 · 2025-02-20T07:39:42Z

This pull request introduces a new implementations of glm::fastPow, glm::fastExp, glm::fastExp2, glm::fastLog, glm::fastLog2. These functions are numerically stable, even for large values.

Btw. I made a pull request before #1332 with the same purpose, but I closed it because I was still trying to find a more efficient code.

ALGORITHM

In this approximation I used Horner's method for polynomial evaluation ((((C0 * x + C1) * x + C2) * x + C3) * x + C4)
where C0 - Cn are coefficents calculated using the Remez Algorithm.
exp2() interval [0, 1], one and two degree polynomial (exp2 works more efficiently in IEEE 754 float)
log() interval[1, 2], two degree polynomial
pow(), uses 4 degree polynomial for both exp and log

SPECIAL CASES

these are not implemented for performance reasons

• glm::fastPow

If y is negative

1 / glm::fastPow(x, abs(-y));

If x is less than 1 (e.g 0.4, 0.5...)

1 / glm::fastPow(1 / x, y);

• glm::fastExp, glm::fastExp2

If x is negative

1 / glm::fastExp(abs(-x));

• glm::fastLog, glm::fastLog2

If x is less than 1 (e.g 0.7, 0.8...)

-glm::fastLog(1 / x);

NEED MORE ACCURACY?

This code is bit slower than this current pr implementation, but its accurate up to 1e-4 (0.0001%) error

float m_exp(float x) {
 //6 degree polynomial coefficients exp2(), interval [0, 1]
 const float EXP1 = 0.00021889229026322152f;
 const float EXP2 = 0.0012384303497239802f;
 const float EXP3 = 0.0096849763219601596f;
 const float EXP4 = 0.055480220997567851f;
 const float EXP5 = 0.24023055018754644f;
 const float EXP6 = 0.69314692456439386f;
 const float EXP7 = 0.10000000026442721e+1f;

/*
 //only for double
 const double max_threshold =  7.09782712893383973096e+02;
 const double min_threshold = -7.45133219101941108420e+02;
*/
 const float max_threshold = 8.8721679688e+01f;
 const float min_threshold = -1.0397208405e+02f;

 const float INV_LN2 = 1.44269504088896338700f;
 const uint32_t FLOAT_BIAS = 127;
 const uint32_t FLOAT_SIGNIFICANT_BIT = 23;
 const uint32_t SIGN_BIT_MASK = 0x80000000;
 
 if(x != x) return NAN;
 if(x == INFINITY) return INFINITY;
 if(x == -INFINITY) return -INFINITY;

 //just make sure no overflow and underflow
 if(x > max_threshold) return INFINITY;
 if(x < min_threshold) return -INFINITY;
 
 //return reciprocal if negative
 if((*(uint32_t*)&x) & SIGN_BIT_MASK) // x < 0.0f
  return 1.0f / m_exp(-x);
 
 x *= INV_LN2; //x *= (1/M_LN2)

 //return scalb(horners_method(x - floor(x)), floor(x))
 uint32_t out = ((uint32_t)(FLOAT_BIAS + uint32_t(x)) << FLOAT_SIGNIFICANT_BIT);
 x -= uint32_t(x);
 if((*(uint32_t*)&x) != 0)
  return (*(float*)(&out)) *
   ((((((EXP1 * x + EXP2) * x + EXP3) * x + EXP4) * x + EXP5) * x + EXP6) * x + EXP7);
 
 return (*(float*)(&out));
}


float m_log(float x) {
 //8 degree polynomial coefficients  log(), interval [1, 2]
 const float LOG1 = -0.0062999510270349574f;
 const float LOG2 = 0.08584329981425072f;
 const float LOG3 = 0.51872183729493937f;
 const float LOG4 = 1.8290586673684612f;
 const float LOG5 = 4.168193859532221f;
 const float LOG6 = 6.4365534020567452f;
 const float LOG7 = 6.9364175845344764f;
 const float LOG8 = 5.6524794507321916f;
 const float LOG9 = 2.3743015582528564f;
	
 const uint32_t FLOAT_BIAS = 127;
 const uint32_t FLOAT_SIGNIFICANT_BIT = 23;
 const uint32_t SIGN_BIT_MASK = 0x80000000;

 if(x != x) return NAN;
 if(x == INFINITY) return INFINITY;
 if(x == -INFINITY) return -INFINITY;
 
 //return nan if negative
 if((*(uint32_t*)&x) & SIGN_BIT_MASK) // x < 0.0f
  return NAN;
	
 if(((*(uint32_t*)&x) >> FLOAT_SIGNIFICANT_BIT) < FLOAT_BIAS) // x < 1.0f
	 return -m_log(1.0f / x);
 
 //exponent = 0
 //mantissa = frexp(x, &exponent)
 //return exponent * M_LN2 + horners_method(mantissa)
 uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF);
 float mx = *(float*)&mantissa;
 return (((*(uint32_t*)&x) >> FLOAT_SIGNIFICANT_BIT)-FLOAT_BIAS) * float(0.69314718055994528623) + 
 ((((((((LOG1 * mx + LOG2) * mx - LOG3) * mx + LOG4) * mx - LOG5) * mx + LOG6) * mx - LOG7) * mx + LOG8) * mx - LOG9);
}

ERROR

• glm::fastPow

x = 6.7
y = 0.00, error : 0.24760962%
y = 1.00, error : 0.14139716%
y = 2.00, error : 0.03850524%
y = 3.00, error : 0.22298415%
y = 4.00, error : 0.09447852%
y = 5.00, error : 0.11734231%
y = 6.00, error : 0.08545813%
y = 7.00, error : 0.23790078%
y = 8.00, error : 0.07089398%
y = 9.00, error : 0.08944029%
y = 10.00, error : 0.13199883%
y = 11.00, error : 0.24625938%
y = 12.00, error : 0.05305298%
y = 13.00, error : 0.05824120%
y = 14.00, error : 0.17782241%
y = 15.00, error : 0.24689905%
y = 16.00, error : 0.04042700%
y = 17.00, error : 0.02405633%
y = 18.00, error : 0.22297528%
y = 19.00, error : 0.23925050%
y = 20.00, error : 0.03312265%

• glm::fastExp

x = 0.00, error : 4.30356860%
x = 1.00, error : 2.98118323%
x = 2.00, error : 0.26578372%
x = 3.00, error : 2.36613181%
x = 4.00, error : 1.26317261%
x = 5.00, error : 0.94539768%
x = 6.00, error : 2.36313024%
x = 7.00, error : 1.41008692%
x = 8.00, error : 2.95274340%
x = 9.00, error : 1.87398441%
x = 10.00, error : 2.94006676%
x = 11.00, error : 0.03307902%
x = 12.00, error : 2.22139533%
x = 13.00, error : 1.44142235%
x = 14.00, error : 0.68031238%
x = 15.00, error : 2.47628319%
x = 16.00, error : 1.81461304%
x = 17.00, error : 2.98891599%
x = 18.00, error : 1.60186663%
x = 19.00, error : 2.88561219%
x = 20.00, error : 0.19276333%

• glm::fastExp2

x = 0.00, error : 0.24760962%
x = 1.00, error : 0.24760962%
x = 2.00, error : 0.24760962%
x = 3.00, error : 0.24760962%
x = 4.00, error : 0.24760962%
x = 5.00, error : 0.24760962%
x = 6.00, error : 0.24760962%
x = 7.00, error : 0.24760962%
x = 8.00, error : 0.24760962%
x = 9.00, error : 0.24760962%
x = 10.00, error : 0.24760962%
x = 11.00, error : 0.24760962%
x = 12.00, error : 0.24760962%
x = 13.00, error : 0.24760962%
x = 14.00, error : 0.24760962%
x = 15.00, error : 0.24760962%
x = 16.00, error : 0.24760962%
x = 17.00, error : 0.24760962%
x = 18.00, error : 0.24760962%
x = 19.00, error : 0.24760962%
x = 20.00, error : 0.24760962%

• glm::fastLog

x = 0.00, error : nan
x = 1.00, error : inf
x = 2.00, error : 0.49396884%
x = 3.00, error : 0.07884442%
x = 4.00, error : 0.24698456%
x = 5.00, error : 0.20669479%
x = 6.00, error : 0.04834334%
x = 7.00, error : 0.17220324%
x = 8.00, error : 0.16465646%
x = 9.00, error : 0.11049288%
x = 10.00, error : 0.14447338%
x = 11.00, error : 0.06858298%
x = 12.00, error : 0.03485839%
x = 13.00, error : 0.11212541%
x = 14.00, error : 0.12697873%
x = 15.00, error : 0.05432294%
x = 16.00, error : 0.12349242%
x = 17.00, error : 0.01063724%
x = 18.00, error : 0.08399524%
x = 19.00, error : 0.11310391%
x = 20.00, error : 0.11104532%

• glm::fastLog2

x = 0.00, error : nan
x = 1.00, error : inf
x = 2.00, error : 0.49397945%
x = 3.00, error : 0.07884461%
x = 4.00, error : 0.24698973%
x = 5.00, error : 0.20669022%
x = 6.00, error : 0.04834335%
x = 7.00, error : 0.17220546%
x = 8.00, error : 0.16465982%
x = 9.00, error : 0.11048829%
x = 10.00, error : 0.14447026%
x = 11.00, error : 0.06859365%
x = 12.00, error : 0.03485831%
x = 13.00, error : 0.11212646%
x = 14.00, error : 0.12697578%
x = 15.00, error : 0.05433159%
x = 16.00, error : 0.12350082%
x = 17.00, error : 0.01062610%
x = 18.00, error : 0.08398610%
x = 19.00, error : 0.11310125%
x = 20.00, error : 0.11103747%

PERFORMANCE TEST

aarch 64

time : 52690 std::pow
time : 42250 glm::fastPow

time : 26476 std::exp
time : 18128 glm::fastExp

time : 23330 std::exp2
time : 20809 glm::fastExp2

time : 25646 std::log
time : 19567 glm::fastLog

time : 23910 std::log2
time : 17782 glm::fastLog2

x86_64

time : 509 std::pow
time : 110 glm::fastPow

time : 171 std::exp
time : 67 glm::fastExp

time : 125 std::exp2
time : 48 glm::fastExp2

time : 220 std::log
time : 39 glm::fastLog

time : 140 std::log2
time : 44 glm::fastLog2

glm::fastPow, glm::fastExp, glm::fastLog polynomial approximation

1e94e6d

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

exponential polynomial approximation #1346

exponential polynomial approximation #1346

ZvRzyan18 commented Feb 20, 2025 •

edited

Loading

exponential polynomial approximation #1346

Are you sure you want to change the base?

exponential polynomial approximation #1346

Conversation

ZvRzyan18 commented Feb 20, 2025 • edited Loading

ALGORITHM

SPECIAL CASES

• glm::fastPow

• glm::fastExp, glm::fastExp2

• glm::fastLog, glm::fastLog2

NEED MORE ACCURACY?

ERROR

PERFORMANCE TEST

ZvRzyan18 commented Feb 20, 2025 •

edited

Loading