gml_Math.h

Go to the documentation of this file.
00001 /** @file gml_Math.h
00002  * 
00003  *     Overloaded and optimized math functions.
00004  *     Probably no performance improvement will occur if inlining is disabled.
00005  * 
00006  *   Copyright (c) 2003-2004 CLIPS-IMAG
00007  * 
00008  *   See the file "gml_LicenseTerms.txt" for information on usage and redistribution
00009  *   of this file, and for a DISCLAIMER OF ALL WARRANTIES.
00010  * 
00011  *   Created on November 20, 2003 (FB).
00012  */
00013 #ifndef __GML_MATH__
00014 #define __GML_MATH__
00015 
00016 #include <math.h>
00017 #include <assert.h>
00018 #include <stdlib.h>
00019 
00020 #include "gml/base/gml_Types.h"
00021 
00022 #if defined(__POWERPC__) && defined(HAVE_PPC_INTRINSICS_H)
00023 #  include <ppc_intrinsics.h>
00024 #else
00025 #  undef __GML_MATH_USE_PPC_INTRINSICS
00026 #endif
00027 
00028 #ifndef __cplusplus
00029 #error C++ is required for this file
00030 #endif
00031 
00032 /// @name Overloaded mathematical functions.
00033 ///       Standard C library mathematical functions, 
00034 ///       overloaded to accept any precision.
00035 //@{
00036 inline Float32  Cos     (Float32 a)            { return cosf (a);      }
00037 inline Float64  Cos     (Float64 a)            { return cos (a);       }
00038 inline Float32  Acos    (Float32 a)            { return acosf (a);     }
00039 inline Float64  Acos    (Float64 a)            { return acos (a);      }
00040 inline Float32  Sin     (Float32 a)            { return sinf (a);      }
00041 inline Float64  Sin     (Float64 a)            { return sin (a);       }
00042 inline Float32  Asin    (Float32 a)            { return asinf (a);     }
00043 inline Float64  Asin    (Float64 a)            { return asin (a);      }
00044 inline Float32  Tan     (Float32 a)            { return tanf (a);      }
00045 inline Float64  Tan     (Float64 a)            { return tan (a);       }
00046 inline Float32  Atan    (Float32 a)            { return atanf (a);     }
00047 inline Float64  Atan    (Float64 a)            { return atan (a);      }
00048 inline Float32  Atan2   (Float32 y, Float32 x) { return atan2f (y, x); }
00049 inline Float64  Atan2   (Float64 y, Float64 x) { return atan2 (y, x);  }
00050 inline Float32  Hypot   (Float32 y, Float32 x) { return hypotf (y, x); }
00051 inline Float64  Hypot   (Float64 y, Float64 x) { return hypot (y, x);  }
00052 inline Float32  Exp     (Float32 a)            { return expf (a);      }
00053 inline Float64  Exp     (Float64 a)            { return exp (a);       }
00054 inline Float32  Exp2    (Float32 a)            { return exp2f (a);     }
00055 inline Float64  Exp2    (Float64 a)            { return exp2 (a);      }
00056 
00057 inline UInt8    Abs     (SInt8   a)            { return labs (a);      }
00058 inline UInt16   Abs     (SInt16  a)            { return labs (a);      }
00059 inline UInt32   Abs     (SInt32  a)            { return labs (a);      }
00060 inline UInt64   Abs     (SInt64  a)            { return llabs (a);     }
00061 inline Float32  Abs     (Float32 a)            { return fabsf (a);     }
00062 inline Float64  Abs     (Float64 a)            { return fabs (a);      }
00063 inline Float32  Sqrt    (Float32 a);
00064 inline Float64  Sqrt    (Float64 a);
00065 inline Float32  Cbrt    (Float32 a)            { return cbrtf (a);     }
00066 inline Float64  Cbrt    (Float64 a)            { return cbrt (a);      }
00067 inline Float32  Log1p   (Float32 a)            { return log1p (a);     }
00068 inline Float64  Log1p   (Float64 a)            { return log1pf (a);    }
00069 inline Float32  Log     (Float32 a)            { return log (a);       }
00070 inline Float64  Log     (Float64 a)            { return logf (a);      }
00071 
00072 /// Count the digits in <a>.
00073 inline UInt8    Log2    (UInt32 a);
00074 //@}
00075 
00076 /// @name Not A Number functions
00077 ///       Generate and check NaNs.
00078 //@{
00079 inline void     Nan     (Float32 &a)           { a = nanf ("");        }
00080 inline void     Nan     (Float64 &a)           { a = nan  ("");        }
00081 inline int      Isnan   (Float32 a)            { return isnan (a);     }
00082 inline int      Isnan   (Float64 a)            { return isnan (a);     }
00083 //@}
00084 
00085 /// @name Optimized selection functions
00086 ///       Select() is equivalent to:  (a < b) ? alt1 : alt2
00087 ///       Whenever possible, Select() is implemented without branching.
00088 //@{
00089 inline UInt8   Select (  UInt8 a,   UInt8 b,   UInt8 alt1,   UInt8 alt2);
00090 inline SInt8   Select (  SInt8 a,   SInt8 b,   SInt8 alt1,   SInt8 alt2);
00091 inline UInt16  Select ( UInt16 a,  UInt16 b,  UInt16 alt1,  UInt16 alt2);
00092 inline SInt16  Select ( SInt16 a,  SInt16 b,  SInt16 alt1,  SInt16 alt2);
00093 inline UInt32  Select ( UInt32 a,  UInt32 b,  UInt32 alt1,  UInt32 alt2);
00094 inline SInt32  Select ( SInt32 a,  SInt32 b,  SInt32 alt1,  SInt32 alt2);
00095 inline UInt64  Select ( UInt64 a,  UInt64 b,  UInt64 alt1,  UInt64 alt2);
00096 inline SInt64  Select ( SInt64 a,  SInt64 b,  SInt64 alt1,  SInt64 alt2);
00097 
00098 inline Float32 Select (Float32 a, Float32 b, Float32 alt1, Float32 alt2);
00099 inline Float64 Select (Float64 a, Float64 b, Float64 alt1, Float64 alt2);
00100 //@}
00101 
00102 /// @name Fast minimum, maximum, clamping and clipping functions
00103 /// These functions use the optimized Select() flavours above to acheive performance.
00104 /// They work with any integer or floating-point type.
00105 //@{
00106 
00107 template <typename T> inline T Min   (T n, T p);
00108 template <typename T> inline T Max   (T n, T p);
00109 template <typename T> inline T Min   (T a, T b, T c);
00110 template <typename T> inline T Max   (T a, T b, T c);
00111 template <typename T> inline T Clamp (T v, T min, T max);
00112 template <typename T> inline T Clip  (T v, T min, T max);
00113 
00114 //@}
00115 
00116 /// @name Rounding
00117 //@{
00118 inline UInt32  LFloor (Float32 a);
00119 inline UInt32  LFloor (Float64 a);
00120 inline SInt32  LRound (Float32 d);
00121 inline Float32 Floor  (Float32 a)           { return floorf (a);       }
00122 inline Float64 Floor  (Float64 a)           { return floor (a);        }
00123 inline Float32 Ceil   (Float32 a)           { return ceilf (a);        }
00124 inline Float64 Ceil   (Float64 a)           { return ceil (a);         }
00125 //@}
00126 
00127 /// Floating-point division
00128 inline Float32 Divide (Float32 x, Float32 y);
00129 
00130 /// Average of three integers
00131 inline UInt32  Average (UInt32 a, UInt32 b, UInt32 c);
00132 
00133 /// Integer square root
00134 inline UInt16  Sqrt (UInt32 a);
00135 
00136 /// @name Euclidian vector norm
00137 //@{
00138 inline UInt32  Norm (UInt32  a,  UInt32  b);
00139 inline Float32 Norm (Float32 a,  Float32 b);
00140 inline UInt32  Norm (UInt32  a1, UInt32  b1, UInt32  a2, UInt32  b2);
00141 inline Float32 Norm (Float32 a1, Float32 b1, Float32 a2, Float32 b2);
00142 //@}
00143 
00144 /// @name Square function
00145 //@{
00146 inline UInt32  Sqr (SInt32  a) { return a * a; }                        
00147 inline UInt32  Sqr (UInt32  a) { return a * a; }                        
00148 inline Float32 Sqr (Float32 a) { return a * a; }                        
00149 inline Float64 Sqr (Float64 a) { return a * a; }                        
00150 //@}
00151 
00152 
00153 /// @name Fast bit shifting
00154 //@{
00155 
00156 ///
00157 /// ShiftLeft --
00158 ///   Shift <integer> left by a signed offset: if negative, a right shift is applied.
00159 ///   If <offset> is a compile-time constant, no branch should be generated by your
00160 ///   compiler -- it should actually only output one assembly instruction.
00161 ///
00162 template <typename T>
00163 inline T ShiftLeft (T const integer, SInt8 const offset);
00164 
00165 ///
00166 /// ShiftRight --
00167 ///   Shift <integer> right by a signed offset: if negative, a left shift is applied.
00168 ///   If <offset> is a compile-time constant, no branch should be generated by your
00169 ///   compiler -- it should actually only output one assembly instruction.
00170 ///
00171 template <typename T>
00172 inline T ShiftRight  (T const integer, SInt8 const offset);
00173 
00174 
00175 ///
00176 /// ShiftLeft --
00177 ///   Fast, Select()-based bit shifting for 64-bit integers on 32-bit machines.
00178 ///
00179 inline UInt64 ShiftLeft (UInt64 const integer, UInt8 const s);
00180 
00181 ///
00182 /// ShiftRight --
00183 ///   Fast, Select()-based bit shifting for 64-bit integers on 32-bit machines.
00184 ///
00185 inline UInt64 ShiftRight (UInt64 const integer, UInt8 const s);
00186 
00187 //@}
00188 
00189 //
00190 //  Implementation follows
00191 //
00192 
00193 inline UInt8 Log2 (UInt32 a)
00194 {
00195   #if defined(__GML_MATH_USE_PPC_INTRINSICS)
00196     return UInt8 (32 - __cntlzw (a));
00197   #else
00198     if (a == 0) return 0;
00199     for (int k = 32; k; --k) {
00200       if (a == (a & (1 << (k-1)))) return k;
00201     }
00202     return 0; // prevent compiler warning
00203   #endif /* __GML_MATH_USE_PPC_INTRINSICS */
00204 }
00205 
00206 inline UInt8 Select (UInt8 a, UInt8 b, UInt8 alt1, UInt8 alt2)
00207 {
00208   return (UInt8) Select (UInt32(a), UInt32(b), UInt32(alt1), UInt32(alt2));
00209 }
00210 
00211 inline UInt16 Select (UInt16 a, UInt16 b, UInt16 alt1, UInt16 alt2)
00212 {
00213   return (UInt16) Select (UInt32(a), UInt32(b), UInt32(alt1), UInt32(alt2));
00214 }
00215 
00216 inline UInt32 Select (UInt32 a, UInt32 b, UInt32 alt1, UInt32 alt2)
00217 {
00218   #ifdef __GML_MATH_FAST_SELECT
00219     const UInt32 sign =  ((a - b)) >> 31;
00220     return (alt1 * sign) + (alt2 * (SInt32(1) - sign));
00221   #else
00222     return (a < b) ? alt1 : alt2;
00223   #endif // __GML_MATH_FAST_SELECT
00224 }
00225 
00226 inline UInt64 Select (UInt64 a, UInt64 b, UInt64 alt1, UInt64 alt2)
00227 {
00228   #ifdef __GML_MATH_FAST_SELECT
00229     const UInt64 sign =  ((UInt64) ((SInt64) a - b)) >> 63;
00230     return (alt1 * sign) + (alt2 * (SInt64(1) - sign));
00231   #else
00232     return (a < b) ? alt1 : alt2;
00233   #endif // __GML_MATH_FAST_SELECT
00234 }
00235 
00236 inline SInt8 Select (SInt8 a, SInt8 b, SInt8 alt1, SInt8 alt2)
00237 {
00238   return (a < b) ? alt1 : alt2;
00239 }
00240 
00241 inline SInt16 Select (SInt16 a, SInt16 b, SInt16 alt1, SInt16 alt2)
00242 {
00243   return (a < b) ? alt1 : alt2;
00244 }
00245 
00246 inline SInt32 Select (SInt32 a, SInt32 b, SInt32 alt1, SInt32 alt2)
00247 {
00248   return (a < b) ? alt1 : alt2;
00249 }
00250 
00251 inline SInt64 Select (SInt64 a, SInt64 b, SInt64 alt1, SInt64 alt2)
00252 {
00253   return (a < b) ? alt1 : alt2;
00254 }
00255 
00256 
00257 inline Float64 Select (Float64 a, Float64 b, Float64 alt1, Float64 alt2)
00258 {
00259   #if defined(__GML_MATH_FAST_SELECT) && defined(__GML_MATH_USE_PPC_INTRINSICS)
00260     return __fsel (b-a, alt1, alt2);
00261   #else
00262     return (a < b) ? alt1 : alt2;
00263   #endif // __GML_MATH_FAST_SELECT
00264 }
00265 
00266 inline Float32 Select (Float32 a, Float32 b, Float32 alt1, Float32 alt2)
00267 {
00268   #if defined(__GML_MATH_FAST_SELECT) && defined(__GML_MATH_USE_PPC_INTRINSICS)
00269     return __fsels (b-a, alt1, alt2);
00270   #else
00271     return (a < b) ? alt1 : alt2;
00272   #endif // __GML_MATH_FAST_SELECT
00273 }
00274 
00275 template <typename T>
00276 inline T Select (T a, T b, T alt1, T alt2)
00277 {
00278   return (a < b) ? alt1 : alt2;
00279 }
00280 
00281 template <typename T>
00282 inline T Min (T n, T p)
00283 {
00284   return Select (n, p, n, p);
00285 }
00286 
00287 template <typename T>
00288 inline T Max (T n, T p)
00289 {
00290   return Select (n, p, p, n);
00291 }
00292 
00293 template <typename T>
00294 inline T Min (T a, T b, T c)
00295 {
00296   return Min (a, Min (b, c));
00297 }
00298 
00299 template <typename T>
00300 inline T Max (T a, T b, T c)
00301 {
00302   return Max (a, Max (b, c));
00303 }
00304 
00305 
00306 template <typename T>
00307 inline T Clamp (T v, T min, T max)
00308 {
00309   return Max (min, Min (v, max));
00310 }
00311 
00312 template <typename T>
00313 inline T Clip (T v, T min, T max)
00314 {
00315   return Select (v, min, v, Select (v, max, max, v));
00316 }
00317 
00318 
00319 
00320 inline SInt32 LRound (Float32 d)
00321 {
00322   #ifdef __GML_MATH_USE_PPC_INTRINSICS
00323     Float32 ff = Floor (d);
00324     return (SInt32) __fsel (d - ff - 0.5F, ff + 1.0F, ff);
00325   #else
00326     return (SInt32) lroundf (d);
00327   #endif /* __GML_MATH_USE_PPC_INTRINSICS */
00328 }
00329 
00330 inline Float32 Divide (Float32 x, Float32 y)
00331 {
00332   #ifdef __GML_MATH_USE_PPC_INTRINSICS
00333     return x * __fres (y);
00334   #else
00335     return x/y;
00336   #endif /* __GML_MATH_USE_PPC_INTRINSICS */
00337 }
00338 
00339 inline UInt32 LFloor (Float32 d)
00340 {
00341   #ifdef __GML_MATH_USE_PPC_INTRINSICS
00342     typedef union { Float64 d; UInt64 u; } T;
00343     T res; res.d = __fctiwz (d);
00344     return (UInt32) res.u;
00345   #else
00346     return (UInt32) floorf (d);
00347   #endif /* __GML_MATH_USE_PPC_INTRINSICS */
00348 }
00349 
00350 inline UInt32 LFloor (Float64 d)
00351 {
00352   #ifdef __GML_MATH_USE_PPC_INTRINSICS
00353     typedef union { Float64 d; UInt64 u; } T;
00354     T res; res.d = __fctiwz (d);
00355     return (UInt32) res.u;
00356   #else
00357     return (UInt32) floor (d);
00358   #endif /* __GML_MATH_USE_PPC_INTRINSICS */
00359 }
00360 
00361 inline UInt32 Average (UInt32 a, UInt32 b, UInt32 c)
00362 {
00363     return (a+b+c)/3;
00364 }
00365 
00366 inline UInt16 Sqrt (UInt32 a)
00367 {
00368   #if defined(__GML_MATH_USE_PPC_INTRINSICS) && defined(__GML_MATH_FAST_SQRT)
00369     const Float64 b = (Float64) a;
00370     return (UInt16) LFloor (b * __frsqrte (b));
00371   #else
00372     UInt32 rem = 0;
00373     UInt32 root = 0;
00374 
00375     for (int i = 0; i < 16; i++) {
00376       root <<= 1;
00377       rem = ((rem << 2) + (a >> 30));
00378       a <<= 2;
00379       root++;
00380       if (root <= rem) {
00381         rem -= root;
00382         root++;
00383       } else
00384         root--;
00385     }
00386 
00387     return (UInt16) (root >> 1);
00388   #endif
00389 }
00390 
00391 inline Float32 Sqrtr (Float32 a)
00392 {
00393   #if defined(__GML_MATH_USE_PPC_INTRINSICS) && defined(__GML_MATH_FAST_SQRT)
00394     Float32 e = __frsqrtes (a);
00395     #ifdef __GML_MATH_FAST_SQRT_REFINE
00396       // two rounds of Newton-Raphson refinement
00397       e = (.5F * e * (3.0F - a * e * e));
00398       e = (.5F * e * (3.0F - a * e * e));
00399     #endif
00400     return e;
00401   #else
00402     return 1.0F / sqrtf (a);
00403   #endif
00404 }
00405 
00406 inline Float64 Sqrtr (Float64 a)
00407 {
00408   #if defined(__GML_MATH_USE_PPC_INTRINSICS) && defined(__GML_MATH_FAST_SQRT)
00409     Float64 e = __frsqrte (a);
00410     #ifdef __GML_MATH_FAST_SQRT_REFINE
00411       // two rounds of Newton-Raphson refinement
00412       e = (.5 * e * (3.0 - a * e * e));
00413       e = (.5 * e * (3.0 - a * e * e));
00414     #endif
00415     return e;
00416   #else
00417     return 1.0 / sqrt (a);
00418   #endif
00419 }
00420 
00421 inline Float32 Sqrt (Float32 a)
00422 {
00423   #if defined(__GML_MATH_USE_PPC_INTRINSICS) && defined(__GML_MATH_FAST_SQRT)
00424     return a * Sqrtr (a);
00425   #else
00426     return sqrtf (a);
00427   #endif
00428 }
00429 
00430 inline Float64 Sqrt (Float64 a)
00431 {
00432   #if defined(__GML_MATH_USE_PPC_INTRINSICS) && defined(__GML_MATH_FAST_SQRT)
00433     return a * Sqrtr (a);
00434   #else
00435     return sqrt (a);
00436   #endif
00437 }
00438 
00439 inline UInt32 Norm (UInt32 a, UInt32 b)
00440 {
00441   return Sqrt (a*a + b*b);
00442 }
00443 
00444 inline Float32 Norm (Float32 a, Float32 b)
00445 {
00446   return Sqrt (a*a + b*b);
00447 }
00448 
00449 inline UInt32 Norm (UInt32 a1, UInt32 b1, UInt32 a2, UInt32 b2)
00450 {
00451   return Norm (Abs ((SInt32) a1 - (SInt32) a2), Abs ((SInt32) b1 - (SInt32) b2));
00452 }
00453 
00454 inline Float32 Norm (Float32 a1, Float32 b1, Float32 a2, Float32 b2)
00455 {
00456   return Norm (a1 - a2, b1 - b2);
00457 }
00458 
00459 template <typename T>
00460 inline
00461 T ShiftLeft (T const integer, SInt8 const offset)
00462 {
00463   if (offset > 0) {
00464     return integer << offset;
00465   } else if (offset < 0) {
00466     return integer >> -offset;
00467   } else {
00468     return integer;
00469   }
00470 }
00471 
00472 template <typename T>
00473 inline
00474 T ShiftRight (T const integer, SInt8 const offset)
00475 {
00476   if (offset > 0) {
00477     return integer >> offset;
00478   } else if (offset < 0) {
00479     return integer << -offset;
00480   } else {
00481     return integer;
00482   }
00483 }
00484 
00485 
00486 union UInt64_union {
00487   UInt64 val;
00488   struct {
00489     #if BYTE_ORDER == BIG_ENDIAN
00490       UInt32  hi;
00491       UInt32  lo;
00492     #else
00493       UInt32  lo;
00494       UInt32  hi;
00495     #endif
00496   };
00497 };
00498 
00499 inline
00500 UInt64 ShiftLeft (UInt64 const integer, UInt8 const s)
00501 {
00502   UInt64_union ui;
00503   UInt64_union res;
00504   assert (s <= 64);
00505   ui.val = integer;
00506   res.hi = Select ((UInt32) s, 32UL,   (ui.hi << s) | (ui.lo >> (32-s)),  (ui.lo << (s-32)));
00507   res.lo = Select ((UInt32) s, 32UL,   (ui.lo << s),                      (UInt32) 0);
00508   assert (res.val == (integer << s));
00509   return res.val;
00510 }
00511 
00512 inline
00513 UInt64 ShiftRight (UInt64 const integer, UInt8 const s)
00514 {
00515   UInt64_union ui;
00516   UInt64_union res;
00517   assert (s <= 64);
00518   ui.val = integer;
00519   res.hi = Select ((UInt32) s, 32UL,   (ui.hi >> s),                      (UInt32) 0);
00520   res.lo = Select ((UInt32) s, 32UL,   (ui.hi << (32-s)) | (ui.lo >> s),  (ui.hi >> (s-32)));
00521   assert (res.val == (integer >> s));
00522   return res.val;
00523 }
00524 
00525 #endif
00526