// Copyright 2019 The MediaPipe Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // This class is intended to contain a collection of useful (static) // mathematical functions, properly coded (by consulting numerical // recipes or another authoritative source first). #ifndef MEDIAPIPE_DEPS_MATHUTIL_H_ #define MEDIAPIPE_DEPS_MATHUTIL_H_ #include #include #include #include "mediapipe/framework/port/integral_types.h" #include "mediapipe/framework/port/logging.h" namespace mediapipe { // ========================================================================= // class MathUtil { public: // -------------------------------------------------------------------- // Round // This function rounds a floating-point number to an integer. It // works for positive or negative numbers. // // Values that are halfway between two integers may be rounded up or // down, for example Round(0.5) == 0 and Round(1.5) == 2. // This allows the function to be implemented efficiently on multiple // hardware platforms (see the template specializations at the bottom // of this file). You should not use this function if you care about which // way such half-integers are rounded. // // Example usage: // double y, z; // int x = Round(y + 3.7); // int64 b = Round(0.3 * z); // // Note that the floating-point template parameter is typically inferred // from the argument type, i.e. there is no need to specify it explicitly. // -------------------------------------------------------------------- template static IntOut Round(FloatIn x) { static_assert(!std::numeric_limits::is_integer, "FloatIn is integer"); static_assert(std::numeric_limits::is_integer, "IntOut is not integer"); // We don't use sgn(x) below because there is no need to distinguish the // (x == 0) case. Also note that there are specialized faster versions // of this function for Intel, ARM and PPC processors at the bottom // of this file. if (x > -0.5 && x < 0.5) { // This case is special, because for largest floating point number // below 0.5, the addition of 0.5 yields 1 and this would lead // to incorrect result. return static_cast(0); } return static_cast(x < 0 ? (x - 0.5) : (x + 0.5)); } // Convert a floating-point number to an integer. For all inputs x where // static_cast(x) is legal according to the C++ standard, the result // is identical to that cast (i.e. the result is x with its fractional part // truncated whenever that is representable as IntOut). // // static_cast would cause undefined behavior for the following cases, which // have well-defined behavior for this function: // // 1. If x is NaN, the result is zero. // // 2. If the truncated form of x is above the representable range of IntOut, // the result is std::numeric_limits::max(). // // 3. If the truncated form of x is below the representable range of IntOut, // the result is std::numeric_limits::min(). // // Note that cases #2 and #3 cover infinities as well as finite numbers. // // The range of FloatIn must include the range of IntOut, otherwise // the results are undefined. template static IntOut SafeCast(FloatIn x) { static_assert(!std::numeric_limits::is_integer, "FloatIn is integer"); static_assert(std::numeric_limits::is_integer, "IntOut is not integer"); static_assert(std::numeric_limits::radix == 2, "IntOut is base 2"); // Special case NaN, for which the logic below doesn't work. if (std::isnan(x)) { return 0; } // Negative values all clip to zero for unsigned results. if (!std::numeric_limits::is_signed && x < 0) { return 0; } // Handle infinities. if (std::isinf(x)) { return x < 0 ? std::numeric_limits::min() : std::numeric_limits::max(); } // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0), // unless x is zero in which case exp == 0. Note that this implies that the // magnitude of x is strictly less than 2^exp. int exp = 0; std::frexp(x, &exp); // Let N be the number of non-sign bits in the representation of IntOut. If // the magnitude of x is strictly less than 2^N, the truncated version of x // is representable as IntOut. The only representable integer for which this // is not the case is std::numeric_limits::min() for signed types (i.e. // -2^N), but that is covered by the fall-through below. if (exp <= std::numeric_limits::digits) { return x; } // Handle numbers with magnitude >= 2^N. return x < 0 ? std::numeric_limits::min() : std::numeric_limits::max(); } // -------------------------------------------------------------------- // SafeRound // These functions round a floating-point number to an integer. // Results are identical to Round, except in cases where // the argument is NaN, or when the rounded value would overflow the // return type. In those cases, Round has undefined // behavior. SafeRound returns 0 when the argument is // NaN, and returns the closest possible integer value otherwise (i.e. // std::numeric_limits::max() for large positive values, and // std::numeric_limits::min() for large negative values). // The range of FloatIn must include the range of IntOut, otherwise // the results are undefined. // -------------------------------------------------------------------- template static IntOut SafeRound(FloatIn x) { static_assert(!std::numeric_limits::is_integer, "FloatIn is integer"); static_assert(std::numeric_limits::is_integer, "IntOut is not integer"); if (std::isnan(x)) { return 0; } else { return SafeCast((x < 0.) ? (x - 0.5) : (x + 0.5)); } } // -------------------------------------------------------------------- // FastIntRound, FastInt64Round // Fast routines for converting floating-point numbers to integers. // // These routines are approximately 6 times faster than the default // implementation of Round on Intel processors (12 times faster on // the Pentium 3). They are also more than 5 times faster than simply // casting a "double" to an "int" using static_cast. This is // because casts are defined to truncate towards zero, which on Intel // processors requires changing the rounding mode and flushing the // floating-point pipeline (unless programs are compiled specifically // for the Pentium 4, which has a new instruction to avoid this). // // Numbers that are halfway between two integers may be rounded up or // down. This is because the conversion is done using the default // rounding mode, which rounds towards the closest even number in case // of ties. So for example, FastIntRound(0.5) == 0, but // FastIntRound(1.5) == 2. These functions should only be used with // applications that don't care about which way such half-integers are // rounded. // // There are template specializations of Round() which call these // functions (for "int" and "int64" only), but it's safer to call them // directly. // -------------------------------------------------------------------- static int32 FastIntRound(double x) { #if defined __GNUC__ && (defined __i386__ || defined __SSE2__ || \ defined __aarch64__ || defined __powerpc64__) #if defined __AVX__ // AVX. int32 result; __asm__ __volatile__( "vcvtsd2si %1, %0" : "=r"(result) // Output operand is a register : "xm"(x)); // Input operand is an xmm register or memory return result; #elif defined __SSE2__ // SSE2. int32 result; __asm__ __volatile__( "cvtsd2si %1, %0" : "=r"(result) // Output operand is a register : "xm"(x)); // Input operand is an xmm register or memory return result; #elif defined __i386__ // FPU stack. Adapted from /usr/include/bits/mathinline.h. int32 result; __asm__ __volatile__("fistpl %0" : "=m"(result) // Output operand is a memory location : "t"(x) // Input operand is top of FP stack : "st"); // Clobbers (pops) top of FP stack return result; #elif defined __aarch64__ int64 result; __asm__ __volatile__("fcvtns %d0, %d1" : "=w"(result) // Vector floating point register : "w"(x) // Vector floating point register : /* No clobbers */); return static_cast(result); #elif defined __powerpc64__ int64 result; __asm__ __volatile__("fctid %0, %1" : "=d"(result) : "d"(x) : /* No clobbers */); return result; #endif // defined __powerpc64__ #else return Round(x); #endif // if defined __GNUC__ && ... } static int32 FastIntRound(float x) { #if defined __GNUC__ && (defined __i386__ || defined __SSE2__ || \ defined __aarch64__ || defined __powerpc64__) #if defined __AVX__ // AVX. int32 result; __asm__ __volatile__( "vcvtss2si %1, %0" : "=r"(result) // Output operand is a register : "xm"(x)); // Input operand is an xmm register or memory return result; #elif defined __SSE2__ // SSE2. int32 result; __asm__ __volatile__( "cvtss2si %1, %0" : "=r"(result) // Output operand is a register : "xm"(x)); // Input operand is an xmm register or memory return result; #elif defined __i386__ // FPU stack. Adapted from /usr/include/bits/mathinline.h. int32 result; __asm__ __volatile__("fistpl %0" : "=m"(result) // Output operand is a memory location : "t"(x) // Input operand is top of FP stack : "st"); // Clobbers (pops) top of FP stack return result; #elif defined __aarch64__ int64 result; __asm__ __volatile__("fcvtns %s0, %s1" : "=w"(result) // Vector floating point register : "w"(x) // Vector floating point register : /* No clobbers */); return static_cast(result); #elif defined __powerpc64__ uint64 output; __asm__ __volatile__("fctiw %0, %1" : "=d"(output) : "f"(x) : /* No clobbers */); return bit_cast(static_cast(output >> 32)); #endif // defined __powerpc64__ #else return Round(x); #endif // if defined __GNUC__ && ... } static int64 FastInt64Round(double x) { #if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || \ defined __aarch64__ || defined __powerpc64__) #if defined __AVX__ // AVX. int64 result; __asm__ __volatile__( "vcvtsd2si %1, %0" : "=r"(result) // Output operand is a register : "xm"(x)); // Input operand is an xmm register or memory return result; #elif defined __x86_64__ // SSE2. int64 result; __asm__ __volatile__( "cvtsd2si %1, %0" : "=r"(result) // Output operand is a register : "xm"(x)); // Input operand is an xmm register or memory return result; #elif defined __i386__ // There is no CVTSD2SI in i386 to produce a 64 bit int, even with SSE2. // FPU stack. Adapted from /usr/include/bits/mathinline.h. int64 result; __asm__ __volatile__("fistpll %0" : "=m"(result) // Output operand is a memory location : "t"(x) // Input operand is top of FP stack : "st"); // Clobbers (pops) top of FP stack return result; #elif defined __aarch64__ // Floating-point convert to signed integer, // rounding to nearest with ties to even. int64 result; __asm__ __volatile__("fcvtns %d0, %d1" : "=w"(result) : "w"(x) : /* No clobbers */); return result; #elif defined __powerpc64__ int64 result; __asm__ __volatile__("fctid %0, %1" : "=d"(result) : "d"(x) : /* No clobbers */); return result; #endif // if defined __powerpc64__ #else return Round(x); #endif // if defined __GNUC__ && ... } static int64 FastInt64Round(float x) { return FastInt64Round(static_cast(x)); } static int32 FastIntRound(long double x) { return Round(x); } static int64 FastInt64Round(long double x) { return Round(x); } // Absolute value of the difference between two numbers. // Works correctly for signed types and special floating point values. template static typename std::make_unsigned::type AbsDiff(const T x, const T y) { // Carries out arithmetic as unsigned to avoid overflow. typedef typename std::make_unsigned::type R; return x > y ? R(x) - R(y) : R(y) - R(x); } // Clamps value to the range [low, high]. Requires low <= high. template // T models LessThanComparable. static const T& Clamp(const T& low, const T& high, const T& value) { // Prevents errors in ordering the arguments. DCHECK(!(high < low)); if (high < value) return high; if (value < low) return low; return value; } // If two (usually floating point) numbers are within a certain // absolute margin of error. template static bool WithinMargin(const T x, const T y, const T margin) { DCHECK_GE(margin, 0); return (std::abs(x) <= std::abs(y) + margin) && (std::abs(x) >= std::abs(y) - margin); } }; // ========================================================================= // #if defined __GNUC__ && (defined __i386__ || defined __x86_64__ || \ defined __aarch64__ || defined __powerpc64__) // We define template specializations of Round() to get the more efficient // Intel versions when possible. Note that gcc does not currently support // partial specialization of templatized functions. template <> inline int32 MathUtil::Round(float x) { return FastIntRound(x); } template <> inline int32 MathUtil::Round(double x) { return FastIntRound(x); } template <> inline int64 MathUtil::Round(float x) { return FastInt64Round(x); } template <> inline int64 MathUtil::Round(double x) { return FastInt64Round(x); } #endif } // namespace mediapipe #endif // MEDIAPIPE_DEPS_MATHUTIL_H_