[7.6][ML] Assorted runtime optimisations for classification and regre…

…ssion (elastic#873) Backport elastic#863.
przemekwitek · Dec 4, 2019 · dcf7370 · dcf7370
1 parent bc51132
commit dcf7370
Show file tree

Hide file tree

Showing 16 changed files with 369 additions and 81 deletions.
diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
@@ -39,10 +39,12 @@ estimating maximum memory usage. (See {ml-pull}781[#781].)
 * Stratified fractional cross validation for regression. (See {ml-pull}784[#784].)
 * Added `geo_point` supported output for `lat_long` function records. (See {ml-pull}809[#809]
 and {pull}47050[#47050].)
-* Reduce memory usage of {ml} native processes on Windows. (See {ml-pull}844[#844].)
 * Use a random bag of the data to compute the loss function derivatives for each new
 tree which is trained for both regression and classification. (See {ml-pull}811[#811].)
-* Emit `prediction_probability` field alongside prediction field in ml results. (See {ml-pull}818[#818].)
+* Emit `prediction_probability` field alongside prediction field in ml results.
+(See {ml-pull}818[#818].)
+* Reduce memory usage of {ml} native processes on Windows. (See {ml-pull}844[#844].)
+* Reduce runtime of classification and regression. (See {ml-pull}863[#863].)
 
 === Bug Fixes
 * Fixes potential memory corruption when determining seasonality. (See {ml-pull}852[#852].)

diff --git a/include/core/CDataFrame.h b/include/core/CDataFrame.h
@@ -8,6 +8,7 @@
 #define INCLUDED_ml_core_CDataFrame_h
 
 #include <core/CFloatStorage.h>
+#include <core/CPackedBitVector.h>
 #include <core/CVectorRange.h>
 #include <core/Concurrency.h>
 #include <core/ImportExport.h>
@@ -26,7 +27,6 @@ namespace ml {
 namespace core {
 class CDataFrameRowSlice;
 class CDataFrameRowSliceHandle;
-class CPackedBitVector;
 class CTemporaryDirectory;
 
 namespace data_frame_detail {
@@ -35,7 +35,29 @@ using TFloatVec = std::vector<CFloatStorage>;
 using TFloatVecItr = TFloatVec::iterator;
 using TInt32Vec = std::vector<std::int32_t>;
 using TInt32VecCItr = TInt32Vec::const_iterator;
-using TPopMaskedRowFunc = std::function<std::size_t()>;
+
+//! \brief A callback used to iterate over only the masked rows.
+class CORE_EXPORT CPopMaskedRow {
+public:
+    CPopMaskedRow(std::size_t endSliceRows,
+                  CPackedBitVector::COneBitIndexConstIterator& maskedRow,
+                  const CPackedBitVector::COneBitIndexConstIterator& endMaskedRows)
+        : m_EndSliceRows{endSliceRows}, m_MaskedRow{&maskedRow}, m_EndMaskedRows{&endMaskedRows} {
+    }
+
+    std::size_t operator()() const {
+        return ++(*m_MaskedRow) == *m_EndMaskedRows
+                   ? m_EndSliceRows
+                   : std::min(**m_MaskedRow, m_EndSliceRows);
+    }
+
+private:
+    std::size_t m_EndSliceRows;
+    CPackedBitVector::COneBitIndexConstIterator* m_MaskedRow;
+    const CPackedBitVector::COneBitIndexConstIterator* m_EndMaskedRows;
+};
+
+using TOptionalPopMaskedRow = boost::optional<CPopMaskedRow>;
 
 //! \brief A lightweight wrapper around a single row of the data frame.
 //!
@@ -123,7 +145,7 @@ class CORE_EXPORT CRowIterator final
                  std::size_t index,
                  TFloatVecItr rowItr,
                  TInt32VecCItr docHashItr,
-                 TPopMaskedRowFunc popMaskedRow = nullptr);
+                 const TOptionalPopMaskedRow& popMaskedRow);
 
     //! \name Forward Iterator Contract
     //@{
@@ -141,7 +163,7 @@ class CORE_EXPORT CRowIterator final
     std::size_t m_Index = 0;
     TFloatVecItr m_RowItr;
     TInt32VecCItr m_DocHashItr;
-    TPopMaskedRowFunc m_PopMaskedRow;
+    TOptionalPopMaskedRow m_PopMaskedRow;
 };
 }
 
@@ -469,7 +491,7 @@ class CORE_EXPORT CDataFrame final {
     using TStrSizeUMapVec = std::vector<TStrSizeUMap>;
     using TSizeSizePr = std::pair<std::size_t, std::size_t>;
     using TSizeDataFrameRowSlicePtrVecPr = std::pair<std::size_t, TRowSlicePtrVec>;
-    using TPopMaskedRowFunc = data_frame_detail::TPopMaskedRowFunc;
+    using TOptionalPopMaskedRow = data_frame_detail::TOptionalPopMaskedRow;
 
     //! \brief Writes rows to the data frame.
     class CDataFrameRowSliceWriter final {
@@ -504,19 +526,19 @@ class CORE_EXPORT CDataFrame final {
     TRowFuncVecBoolPr parallelApplyToAllRows(std::size_t numberThreads,
                                              std::size_t beginRows,
                                              std::size_t endRows,
-                                             TRowFunc func,
+                                             TRowFunc&& func,
                                              const CPackedBitVector* rowMask,
                                              bool commitResult) const;
     TRowFuncVecBoolPr sequentialApplyToAllRows(std::size_t beginRows,
                                                std::size_t endRows,
-                                               TRowFunc func,
+                                               TRowFunc& func,
                                                const CPackedBitVector* rowMask,
                                                bool commitResult) const;
 
     void applyToRowsOfOneSlice(TRowFunc& func,
                                std::size_t firstRowToRead,
                                std::size_t endRowsToRead,
-                               TPopMaskedRowFunc popMaskedRow,
+                               const TOptionalPopMaskedRow& popMaskedRow,
                                const CDataFrameRowSliceHandle& slice) const;
 
     TRowSlicePtrVecCItr beginSlices(std::size_t beginRows) const;

diff --git a/include/core/CFloatStorage.h b/include/core/CFloatStorage.h
@@ -83,6 +83,46 @@ class CORE_EXPORT CFloatStorage {
     //! Implicit construction from a double.
     CFloatStorage(double value) : m_Value() { this->set(value); }
 
+    //! \name Operators
+    //@{
+    bool operator==(const CFloatStorage& rhs) const {
+        return m_Value == rhs.m_Value;
+    }
+    bool operator==(const double& rhs) const {
+        return static_cast<double>(m_Value) == rhs;
+    }
+    bool operator!=(const CFloatStorage& rhs) const {
+        return m_Value != rhs.m_Value;
+    }
+    bool operator!=(const double& rhs) const {
+        return static_cast<double>(m_Value) != rhs;
+    }
+    bool operator<(const CFloatStorage& rhs) const {
+        return m_Value < rhs.m_Value;
+    }
+    bool operator<(const double& rhs) const {
+        return static_cast<double>(m_Value) < rhs;
+    }
+    bool operator<=(const CFloatStorage& rhs) const {
+        return m_Value <= rhs.m_Value;
+    }
+    bool operator<=(const double& rhs) const {
+        return static_cast<double>(m_Value) <= rhs;
+    }
+    bool operator>(const CFloatStorage& rhs) const {
+        return m_Value > rhs.m_Value;
+    }
+    bool operator>(const double& rhs) const {
+        return static_cast<double>(m_Value) > rhs;
+    }
+    bool operator>=(const CFloatStorage& rhs) const {
+        return m_Value >= rhs.m_Value;
+    }
+    bool operator>=(const double& rhs) const {
+        return static_cast<double>(m_Value) >= rhs;
+    }
+    //@}
+
     //! Set from a string.
     bool fromString(const std::string& string) {
         double value;

diff --git a/include/core/CImmutableRadixSet.h b/include/core/CImmutableRadixSet.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+#ifndef INCLUDED_ml_core_CImmutableRadixSet_h
+#define INCLUDED_ml_core_CImmutableRadixSet_h
+
+#include <core/CContainerPrinter.h>
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+namespace ml {
+namespace core {
+
+//! \brief An immutable sorted set which provides very fast lookup.
+//!
+//! DESCRIPTION:\n
+//! This supports lower bound and look up by index as well as a subset of the non
+//! modifying interface of std::set. Its main purpose is to provide much faster
+//! lookup. To this end it subdivides the range of sorted values into buckets.
+//! In the case that the values are uniformly distributed lowerBound will be O(1)
+//! with low constant. Otherwise, it is worst case O(log(n)).
+template<typename T>
+class CImmutableRadixSet {
+public:
+    using TVec = std::vector<T>;
+    using TCItr = typename std::vector<T>::const_iterator;
+
+public:
+    // We only need to support floating point types at present (although it
+    // could easily extended to support any numeric type).
+    static_assert(std::is_floating_point<T>::value, "Only supports floating point types");
+
+public:
+    CImmutableRadixSet() = default;
+    explicit CImmutableRadixSet(std::initializer_list<T> values)
+        : m_Values{std::move(values)} {
+        this->initialize();
+    }
+    explicit CImmutableRadixSet(TVec values) : m_Values{std::move(values)} {
+        this->initialize();
+    }
+
+    // This is movable only because we hold iterators to the underlying container.
+    CImmutableRadixSet(const CImmutableRadixSet&) = delete;
+    CImmutableRadixSet& operator=(const CImmutableRadixSet&) = delete;
+    CImmutableRadixSet(CImmutableRadixSet&&) = default;
+    CImmutableRadixSet& operator=(CImmutableRadixSet&&) = default;
+
+    //! \name Capacity
+    //@{
+    bool empty() const { return m_Values.size(); }
+    std::size_t size() const { return m_Values.size(); }
+    //@}
+
+    //! \name Iterators
+    //@{
+    TCItr begin() const { m_Values.begin(); }
+    TCItr end() const { m_Values.end(); }
+    //@}
+
+    //! \name Lookup
+    //@{
+    const T& operator[](std::size_t i) const { return m_Values[i]; }
+    std::ptrdiff_t upperBound(const T& value) const {
+        // This branch is predictable so essentially free.
+        if (m_Values.size() < 2) {
+            return std::distance(m_Values.begin(),
+                                 std::upper_bound(m_Values.begin(), m_Values.end(), value));
+        }
+
+        std::ptrdiff_t bucket{static_cast<std::ptrdiff_t>(m_Scale * (value - m_Min))};
+        if (bucket < 0) {
+            return 0;
+        }
+        if (bucket >= static_cast<std::ptrdiff_t>(m_Buckets.size())) {
+            return static_cast<std::ptrdiff_t>(m_Values.size());
+        }
+        TCItr beginBucket;
+        TCItr endBucket;
+        std::tie(beginBucket, endBucket) = m_Buckets[bucket];
+        return std::distance(m_Values.begin(),
+                             std::upper_bound(beginBucket, endBucket, value));
+    }
+    //@}
+
+    std::string print() const {
+        return core::CContainerPrinter::print(m_Values);
+    }
+
+private:
+    using TCItrCItrPr = std::pair<TCItr, TCItr>;
+    using TCItrCItrPrVec = std::vector<TCItrCItrPr>;
+    using TPtrdiffVec = std::vector<std::ptrdiff_t>;
+
+private:
+    void initialize() {
+        std::sort(m_Values.begin(), m_Values.end());
+        m_Values.erase(std::unique(m_Values.begin(), m_Values.end()), m_Values.end());
+        if (m_Values.size() > 1) {
+            std::size_t numberBuckets{m_Values.size()};
+            m_Min = m_Values[0];
+            m_Scale = static_cast<T>(numberBuckets) / (m_Values.back() - m_Min);
+            m_Buckets.reserve(numberBuckets);
+            T bucket{1};
+            T bucketClose{m_Min + bucket / m_Scale};
+            auto start = m_Values.begin();
+            for (auto i = m_Values.begin(); i != m_Values.end(); ++i) {
+                if (*i > bucketClose) {
+                    m_Buckets.emplace_back(start, i);
+                    bucket += T{1};
+                    bucketClose = m_Min + bucket / m_Scale;
+                    start = i;
+                    while (*i > bucketClose) {
+                        m_Buckets.emplace_back(start, i + 1);
+                        bucket += T{1};
+                        bucketClose = m_Min + bucket / m_Scale;
+                    }
+                }
+            }
+            if (m_Buckets.size() < numberBuckets) {
+                m_Buckets.emplace_back(start, m_Values.end());
+            }
+        }
+    }
+
+private:
+    T m_Min = T{0};
+    T m_Scale = T{0};
+    TCItrCItrPrVec m_Buckets;
+    TVec m_Values;
+};
+}
+}
+
+#endif // INCLUDED_ml_core_CImmutableRadixSet_h
diff --git a/include/maths/CBoostedTreeHyperparameters.h b/include/maths/CBoostedTreeHyperparameters.h
@@ -108,7 +108,7 @@ class CBoostedTreeRegularization final {
                                      m_SoftTreeDepthLimit, inserter);
         core::CPersistUtils::persist(REGULARIZATION_SOFT_TREE_DEPTH_TOLERANCE_TAG,
                                      m_SoftTreeDepthTolerance, inserter);
-    };
+    }
 
     //! Populate the object from serialized data.
     bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
@@ -131,7 +131,7 @@ class CBoostedTreeRegularization final {
                                                  m_SoftTreeDepthTolerance, traverser))
         } while (traverser.next());
         return true;
-    };
+    }
 
 public:
     static const std::string REGULARIZATION_DEPTH_PENALTY_MULTIPLIER_TAG;