Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exponential polynomial approximation #1346

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

ZvRzyan18
Copy link

@ZvRzyan18 ZvRzyan18 commented Feb 20, 2025

This pull request introduces a new implementations of glm::fastPow, glm::fastExp, glm::fastExp2, glm::fastLog, glm::fastLog2. These functions are numerically stable, even for large values.

Btw. I made a pull request before #1332 with the same purpose, but I closed it because I was still trying to find a more efficient code.

ALGORITHM

In this approximation I used Horner's method for polynomial evaluation ((((C0 * x + C1) * x + C2) * x + C3) * x + C4)
where C0 - Cn are coefficents calculated using the Remez Algorithm.
exp2() interval [0, 1], one and two degree polynomial (exp2 works more efficiently in IEEE 754 float)
log() interval[1, 2], two degree polynomial
pow(), uses 4 degree polynomial for both exp and log

SPECIAL CASES

these are not implemented for performance reasons

• glm::fastPow

If y is negative

1 / glm::fastPow(x, abs(-y));

If x is less than 1 (e.g 0.4, 0.5...)

1 / glm::fastPow(1 / x, y);

• glm::fastExp, glm::fastExp2

If x is negative

1 / glm::fastExp(abs(-x));

• glm::fastLog, glm::fastLog2

If x is less than 1 (e.g 0.7, 0.8...)

-glm::fastLog(1 / x);

NEED MORE ACCURACY?

This code is bit slower than this current pr implementation, but its accurate up to 1e-4 (0.0001%) error

float m_exp(float x) {
 //6 degree polynomial coefficients exp2(), interval [0, 1]
 const float EXP1 = 0.00021889229026322152f;
 const float EXP2 = 0.0012384303497239802f;
 const float EXP3 = 0.0096849763219601596f;
 const float EXP4 = 0.055480220997567851f;
 const float EXP5 = 0.24023055018754644f;
 const float EXP6 = 0.69314692456439386f;
 const float EXP7 = 0.10000000026442721e+1f;

/*
 //only for double
 const double max_threshold =  7.09782712893383973096e+02;
 const double min_threshold = -7.45133219101941108420e+02;
*/
 const float max_threshold = 8.8721679688e+01f;
 const float min_threshold = -1.0397208405e+02f;

 const float INV_LN2 = 1.44269504088896338700f;
 const uint32_t FLOAT_BIAS = 127;
 const uint32_t FLOAT_SIGNIFICANT_BIT = 23;
 const uint32_t SIGN_BIT_MASK = 0x80000000;
 
 if(x != x) return NAN;
 if(x == INFINITY) return INFINITY;
 if(x == -INFINITY) return -INFINITY;

 //just make sure no overflow and underflow
 if(x > max_threshold) return INFINITY;
 if(x < min_threshold) return -INFINITY;
 
 //return reciprocal if negative
 if((*(uint32_t*)&x) & SIGN_BIT_MASK) // x < 0.0f
  return 1.0f / m_exp(-x);
 
 x *= INV_LN2; //x *= (1/M_LN2)

 //return scalb(horners_method(x - floor(x)), floor(x))
 uint32_t out = ((uint32_t)(FLOAT_BIAS + uint32_t(x)) << FLOAT_SIGNIFICANT_BIT);
 x -= uint32_t(x);
 if((*(uint32_t*)&x) != 0)
  return (*(float*)(&out)) *
   ((((((EXP1 * x + EXP2) * x + EXP3) * x + EXP4) * x + EXP5) * x + EXP6) * x + EXP7);
 
 return (*(float*)(&out));
}


float m_log(float x) {
 //8 degree polynomial coefficients  log(), interval [1, 2]
 const float LOG1 = -0.0062999510270349574f;
 const float LOG2 = 0.08584329981425072f;
 const float LOG3 = 0.51872183729493937f;
 const float LOG4 = 1.8290586673684612f;
 const float LOG5 = 4.168193859532221f;
 const float LOG6 = 6.4365534020567452f;
 const float LOG7 = 6.9364175845344764f;
 const float LOG8 = 5.6524794507321916f;
 const float LOG9 = 2.3743015582528564f;
	
 const uint32_t FLOAT_BIAS = 127;
 const uint32_t FLOAT_SIGNIFICANT_BIT = 23;
 const uint32_t SIGN_BIT_MASK = 0x80000000;

 if(x != x) return NAN;
 if(x == INFINITY) return INFINITY;
 if(x == -INFINITY) return -INFINITY;
 
 //return nan if negative
 if((*(uint32_t*)&x) & SIGN_BIT_MASK) // x < 0.0f
  return NAN;
	
 if(((*(uint32_t*)&x) >> FLOAT_SIGNIFICANT_BIT) < FLOAT_BIAS) // x < 1.0f
	 return -m_log(1.0f / x);
 
 //exponent = 0
 //mantissa = frexp(x, &exponent)
 //return exponent * M_LN2 + horners_method(mantissa)
 uint32_t mantissa = 1065353216U | ((*(uint32_t*)&x) & 0x007FFFFF);
 float mx = *(float*)&mantissa;
 return (((*(uint32_t*)&x) >> FLOAT_SIGNIFICANT_BIT)-FLOAT_BIAS) * float(0.69314718055994528623) + 
 ((((((((LOG1 * mx + LOG2) * mx - LOG3) * mx + LOG4) * mx - LOG5) * mx + LOG6) * mx - LOG7) * mx + LOG8) * mx - LOG9);
}

ERROR

• glm::fastPow

x = 6.7
y = 0.00, error : 0.24760962%
y = 1.00, error : 0.14139716%
y = 2.00, error : 0.03850524%
y = 3.00, error : 0.22298415%
y = 4.00, error : 0.09447852%
y = 5.00, error : 0.11734231%
y = 6.00, error : 0.08545813%
y = 7.00, error : 0.23790078%
y = 8.00, error : 0.07089398%
y = 9.00, error : 0.08944029%
y = 10.00, error : 0.13199883%
y = 11.00, error : 0.24625938%
y = 12.00, error : 0.05305298%
y = 13.00, error : 0.05824120%
y = 14.00, error : 0.17782241%
y = 15.00, error : 0.24689905%
y = 16.00, error : 0.04042700%
y = 17.00, error : 0.02405633%
y = 18.00, error : 0.22297528%
y = 19.00, error : 0.23925050%
y = 20.00, error : 0.03312265%

• glm::fastExp

x = 0.00, error : 4.30356860%
x = 1.00, error : 2.98118323%
x = 2.00, error : 0.26578372%
x = 3.00, error : 2.36613181%
x = 4.00, error : 1.26317261%
x = 5.00, error : 0.94539768%
x = 6.00, error : 2.36313024%
x = 7.00, error : 1.41008692%
x = 8.00, error : 2.95274340%
x = 9.00, error : 1.87398441%
x = 10.00, error : 2.94006676%
x = 11.00, error : 0.03307902%
x = 12.00, error : 2.22139533%
x = 13.00, error : 1.44142235%
x = 14.00, error : 0.68031238%
x = 15.00, error : 2.47628319%
x = 16.00, error : 1.81461304%
x = 17.00, error : 2.98891599%
x = 18.00, error : 1.60186663%
x = 19.00, error : 2.88561219%
x = 20.00, error : 0.19276333%

• glm::fastExp2

x = 0.00, error : 0.24760962%
x = 1.00, error : 0.24760962%
x = 2.00, error : 0.24760962%
x = 3.00, error : 0.24760962%
x = 4.00, error : 0.24760962%
x = 5.00, error : 0.24760962%
x = 6.00, error : 0.24760962%
x = 7.00, error : 0.24760962%
x = 8.00, error : 0.24760962%
x = 9.00, error : 0.24760962%
x = 10.00, error : 0.24760962%
x = 11.00, error : 0.24760962%
x = 12.00, error : 0.24760962%
x = 13.00, error : 0.24760962%
x = 14.00, error : 0.24760962%
x = 15.00, error : 0.24760962%
x = 16.00, error : 0.24760962%
x = 17.00, error : 0.24760962%
x = 18.00, error : 0.24760962%
x = 19.00, error : 0.24760962%
x = 20.00, error : 0.24760962%

• glm::fastLog

x = 0.00, error : nan
x = 1.00, error : inf
x = 2.00, error : 0.49396884%
x = 3.00, error : 0.07884442%
x = 4.00, error : 0.24698456%
x = 5.00, error : 0.20669479%
x = 6.00, error : 0.04834334%
x = 7.00, error : 0.17220324%
x = 8.00, error : 0.16465646%
x = 9.00, error : 0.11049288%
x = 10.00, error : 0.14447338%
x = 11.00, error : 0.06858298%
x = 12.00, error : 0.03485839%
x = 13.00, error : 0.11212541%
x = 14.00, error : 0.12697873%
x = 15.00, error : 0.05432294%
x = 16.00, error : 0.12349242%
x = 17.00, error : 0.01063724%
x = 18.00, error : 0.08399524%
x = 19.00, error : 0.11310391%
x = 20.00, error : 0.11104532%

• glm::fastLog2

x = 0.00, error : nan
x = 1.00, error : inf
x = 2.00, error : 0.49397945%
x = 3.00, error : 0.07884461%
x = 4.00, error : 0.24698973%
x = 5.00, error : 0.20669022%
x = 6.00, error : 0.04834335%
x = 7.00, error : 0.17220546%
x = 8.00, error : 0.16465982%
x = 9.00, error : 0.11048829%
x = 10.00, error : 0.14447026%
x = 11.00, error : 0.06859365%
x = 12.00, error : 0.03485831%
x = 13.00, error : 0.11212646%
x = 14.00, error : 0.12697578%
x = 15.00, error : 0.05433159%
x = 16.00, error : 0.12350082%
x = 17.00, error : 0.01062610%
x = 18.00, error : 0.08398610%
x = 19.00, error : 0.11310125%
x = 20.00, error : 0.11103747%

PERFORMANCE TEST

  • aarch 64

time : 52690 std::pow
time : 42250 glm::fastPow

time : 26476 std::exp
time : 18128 glm::fastExp

time : 23330 std::exp2
time : 20809 glm::fastExp2

time : 25646 std::log
time : 19567 glm::fastLog

time : 23910 std::log2
time : 17782 glm::fastLog2

  • x86_64

time : 509 std::pow
time : 110 glm::fastPow

time : 171 std::exp
time : 67 glm::fastExp

time : 125 std::exp2
time : 48 glm::fastExp2

time : 220 std::log
time : 39 glm::fastLog

time : 140 std::log2
time : 44 glm::fastLog2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant