From 85b584bcf233985fec002834918613bee6f5bca3 Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Wed, 31 Jul 2024 10:22:04 -0500 Subject: [PATCH] feat(algorithms): extrema circular buffer --- a.cc | 65 ++++ posts/algorithms/extrema-circular-buffer.html | 358 +++++++++++++++++- 2 files changed, 416 insertions(+), 7 deletions(-) create mode 100644 a.cc diff --git a/a.cc b/a.cc new file mode 100644 index 0000000..68e1f75 --- /dev/null +++ b/a.cc @@ -0,0 +1,65 @@ +#include +#include +#include + +class ExtremaCircularBuffer { +public: + explicit ExtremaCircularBuffer(size_t capacity) : capacity(capacity) {} + + void push_back(double value) { + if (prices.size() == capacity) { + double front_value = prices.front(); + pop_max(front_value); + prices.pop_front(); + } + + prices.push_back(value); + push_max(value); + } + + void pop_front() { + if (prices.empty()) { + throw std::out_of_range("Cannot pop_front() from empty buffer"); + } + + double front_value = prices.front(); + pop_max(front_value); + prices.pop_front(); + } + + size_t size() const { return prices.size(); } + + double get_max() const { + if (prices.empty()) { + throw std::out_of_range("Cannot find max() of empty buffer"); + } + + return maxs.front().first; + } + +private: + void push_max(double value) { + size_t popped = 0; + + while (!maxs.empty() && maxs.back().first < value) { + popped += maxs.back().second + 1; + maxs.pop_back(); + } + + maxs.emplace_back(value, popped); + } + + void pop_max(double value) { + size_t popped = maxs.front().second; + + if (popped == 0) { + maxs.pop_front(); + } else { + --maxs.front().second; + } + } + + std::deque prices; + std::deque> maxs; + size_t capacity; +}; diff --git a/posts/algorithms/extrema-circular-buffer.html b/posts/algorithms/extrema-circular-buffer.html index fc7108a..9025bd1 100644 --- a/posts/algorithms/extrema-circular-buffer.html +++ b/posts/algorithms/extrema-circular-buffer.html @@ -41,17 +41,361 @@

-

an h2

+

context

+
+

+ While working for + TRB Capital Management, certain + strategies necessitated finding the minimum and maximum of a + moving window of prices. +

+
+

problem statement

+

Design a data structure supporting the following operations:

+
    +
  • + build(size_t capacity) + : initialize the data structure with capacity/window size + capacity +
  • +
      +
    • + The data structure must always hold \(\leq\) + capacity + prices. +
    • +
    +
  • + void push_back(double value) +
  • +
      +
    • + If the data structure exceeds capacity, remove elements from the + front of the window. +
    • +
    +
  • + void pop_front() + : remove the price from the front of the window +
  • +
  • + size_t size() + : return the number of prices in the data structure +
  • +
  • + double get() + : return the extrema (min or max) +
  • +
+

solution

+

+ Try to solve it yourself first. The point of this exercise it to + create the most theoretically optimal solution you can, not + brute-force and move on. +

-

+

naïve solution

+
+
+

+ One can design a data structure meeting these requirements through + simulating the operations directly with a container with most water - + href="https://en.cppreference.com/w/cpp/container/deque" + >std::deque<double>. +

+

+ On the upside, this approach is simple to understand. Further, + operations are all \(O(1)\) time—that is, nearly all + operations. The minimum/maximum element must be found via a linear + scan in \(O(n)\) time, certainly far from optimal. +

+
#include <algorithm>
+#include <deque>
+#include <stdexcept>
+
+class ExtremaCircularBuffer {
+public:
+  ExtremaCircularBuffer(size_t capacity) : capacity(capacity) {}
+
+  void push_back(double value) {
+    if (prices.size() == capacity) {
+      prices.pop_front();
+    }
+
+    prices.push_back(value);
+  }
+
+  void pop_front() {
+    if (prices.empty()) {
+      throw std::out_of_range("Cannot pop_front() from empty buffer");
+    }
+
+    prices.pop_front();
+  }
+
+  size_t size() const { return prices.size(); }
+
+  double get() const {
+    if (prices.empty()) {
+      throw std::out_of_range("Cannot find max() of empty buffer");
+    }
+
+    return *std::max_element(prices.begin(), prices.end());
+  }
+
+private:
+  std::deque<double> prices;
+  size_t capacity;
+};
+
+
+

optimizing the approach

+
+
+

+ Rather than bear the brunt of the work finding extrema in calls to + get(), we can distribute it across the data structure as it is built. +

+

+ Maintaining the prices in a sorted order seems to suffice, and + gives access to both max and min in \(O(1)\) time. However, + all of the problem constraints have not been addressed. Adhering + to the interface of a circular buffer is another challenge. +

+

+ Fortunately, pairing each element with a count allows intelligent + removal/insertion of elements—if an element has a count of + \(0\), remove it from the list of sorted prices. A + std::map<double, size_t> + allows us to do all of this. +

+

+ Now, we can access extrema instantly. Insertion and deletion take + \(O(log(n))\) time thanks to the map—but we can do better. +

+
#include <deque>
+#include <map>
+#include <stdexcept>
+
+class ExtremaCircularBuffer {
+public:
+  ExtremaCircularBuffer(size_t capacity) : capacity(capacity) {}
+
+  void push_back(double value) {
+    if (prices.size() == capacity) {
+      double front = prices.front();
+
+      if (--sorted_prices[front] == 0)
+        sorted_prices.erase(front);
+      prices.pop_front();
+    }
+
+    prices.push_back(value);
+    ++sorted_prices[value];
+  }
+
+  void pop_front() {
+    if (prices.empty()) {
+      throw std::out_of_range("Cannot pop_front() from empty buffer");
+    }
+
+    double front = prices.front();
+
+    if (--sorted_prices[front] == 0)
+      sorted_prices.erase(front);
+    prices.pop_front();
+  }
+
+  size_t size() const { return prices.size(); }
+
+  double get_max() const {
+    if (prices.empty()) {
+      throw std::out_of_range("Cannot find max() of empty buffer");
+    }
+
+    return sorted_prices.rbegin()->first;
+  }
+
+  double get_min() const {
+    if (prices.empty()) {
+      throw std::out_of_range("Cannot find min() of empty buffer");
+    }
+
+    return sorted_prices.begin()->first;
+  }
+
+private:
+  std::deque<double> prices;
+  std::map<double, size_t> sorted_prices;
+  size_t capacity;
+};
+
+
+

monotonic queues deques

+
+
+

+ Thinking a bit deeper about the problem constraints, it is clear + that: +

+
    +
  • + If an extrema is pushed onto the data structure, all previously + pushed elements are irrelevant to any further operations. +
  • +
+

+ Elements are processed in FIFO order, enabling this observation to + be exploited. This is the foundationl idea of the + monotone priority queue + data structure. So, for maintaining a minimum/maximum, the data + structure will store a monotonically increasing/decreasing + double-ended queue. +

+

+ This solution does not satisfy a circular buffer inherently. If an + arbitrary number of elements are removed from the data structure + when an extrema is added, it is certainly not possible to maintain + a window of fixed size. +

+

Thus, we make one more observation to meet this criterion:

+
    +
  • + If each price (extrema) on the monotonic double-ended queue also + maintains a count of previously popped elements, we can + deduce the proper action to take when the data structure reaches + capacity. +
  • +
      +
    1. + If elements were previously popped before this extrema was + added to the data structure, decrement the price's count + of popped elements and do nothing. +
    2. +
    3. + Otherwise, either no elements were pushed before this extrema + or they've all been popped. Remove (pop) this element + from the deque. +
    4. +
    +
+

+ This approach supports all operations in amortized \(O(1)\) time + (with a monotonic sequence, elements are added or removed at least + once; across a sequence of \(n\) operations, \(n\) total \(O(1)\) + operations will be executed). +

+
#include <deque>
+#include <stdexcept>
+#include <utility>
+
+class ExtremaCircularBuffer {
+public:
+  explicit ExtremaCircularBuffer(size_t capacity) : capacity(capacity) {}
+
+  void push_back(double value) {
+    if (prices.size() == capacity) {
+      double front_value = prices.front();
+      pop_max(front_value);
+      prices.pop_front();
+    }
+
+    prices.push_back(value);
+    push_max(value);
+  }
+
+  void pop_front() {
+    if (prices.empty()) {
+      throw std::out_of_range("Cannot pop_front() from empty buffer");
+    }
+
+    double front_value = prices.front();
+    pop_max(front_value);
+    prices.pop_front();
+  }
+
+  size_t size() const { return prices.size(); }
+
+  double get_max() const {
+    if (prices.empty()) {
+      throw std::out_of_range("Cannot find max() of empty buffer");
+    }
+
+    return maxs.front().first;
+  }
+
+private:
+  void push_max(double value) {
+    size_t popped = 0;
+
+    while (!maxs.empty() && maxs.back().first < value) {
+      popped += maxs.back().second + 1;
+      maxs.pop_back();
+    }
+
+    maxs.emplace_back(value, popped);
+  }
+
+  void pop_max(double value) {
+    size_t popped = maxs.front().second;
+
+    if (popped == 0) {
+      maxs.pop_front();
+    } else {
+      --maxs.front().second;
+    }
+  }
+
+  std::deque<double> prices;
+  std::deque<std::pair<double, size_t>> maxs;
+  size_t capacity;
+};
+

further improvements

+
    +
  1. + While the final approach is theoretically faster than the + second, with small data sets the overhead of the latter is + likely to upset any performance gains. +
  2. +
  3. + The class could leverage templates to take in a comparator + std::less<double> + ) to easily specify a minimum/maximum + ExtremaCircularBuffer + as well as a value type to support all operations. +
  4. +
  5. + As it stands, the class also only maintains one of either + extrema, and using two monotonic deques, while still + theoretically optimal, doesn't give me a good + feeling. The second map-based approach might be favorable here. +
  6. +
-
content