Multi-day backtesting now supported.

mhallsmoore · Jun 3, 2015 · 458f263 · femtotrader · Jun 19, 2016 · 458f263
1 parent 17b36c5
commit 458f263
Show file tree

Hide file tree

Showing 4 changed files with 113 additions and 36 deletions.
diff --git a/backtest/backtest.py b/backtest/backtest.py
@@ -18,7 +18,7 @@
 
 def backtest(
         events, ticker, strategy, portfolio, 
-        execution, heartbeat, max_iters=200000
+        execution, heartbeat, max_iters=5000000
     ):
     """
     Carries out an infinite while loop that polls the 
@@ -29,12 +29,11 @@ def backtest(
     exceeded.
     """
     iters = 0
-    while True and iters < max_iters:
-        ticker.stream_next_tick()
+    while iters < max_iters and ticker.continue_backtest:
         try:
             event = events.get(False)
         except queue.Empty:
-            pass
+            ticker.stream_next_tick()
         else:
             if event is not None:
                 if event.type == 'TICK':
@@ -54,7 +53,7 @@ def backtest(
     events = queue.Queue()
     equity = settings.EQUITY
 
-    # Load the historic CSV tick data files
+    # Load the historic CSV tick data filesw
     pairs = ["GBPUSD"]
     csv_dir = settings.CSV_DATA_DIR
     if csv_dir is None:

diff --git a/data/price.py b/data/price.py
@@ -1,12 +1,16 @@
+from __future__ import print_function
+
 import datetime
 from decimal import Decimal, getcontext, ROUND_HALF_DOWN
 import os
 import os.path
+import re
 import time
 
 import numpy as np
 import pandas as pd
 
+from qsforex import settings
 from qsforex.event.event import TickEvent
 
 
@@ -96,9 +100,32 @@ def __init__(self, pairs, events_queue, csv_dir):
         self.csv_dir = csv_dir
         self.prices = self._set_up_prices_dict()
         self.pair_frames = {}
-        self._open_convert_csv_files()
+        self.file_dates = self._list_all_file_dates()
+        self.continue_backtest = True
+        self.cur_date_idx = 0
+        self.cur_date_pairs = self._open_convert_csv_files_for_day(
+            self.file_dates[self.cur_date_idx]
+        )
+
+    def _list_all_csv_files(self):
+        files = os.listdir(settings.CSV_DATA_DIR)
+        pattern = re.compile("[A-Z]{6}_\d{8}.csv")
+        matching_files = [f for f in files if pattern.search(f)]
+        matching_files.sort()
+        return matching_files
 
-    def _open_convert_csv_files(self):
+    def _list_all_file_dates(self):
+        """
+        Removes the pair, underscore and '.csv' from the
+        dates and eliminates duplicates. Returns a list
+        of date strings of the form "YYYYMMDD". 
+        """
+        csv_files = self._list_all_csv_files()
+        de_dup_csv = list(set([d[7:-4] for d in csv_files]))
+        de_dup_csv.sort()
+        return de_dup_csv
+
+    def _open_convert_csv_files_for_day(self, date_str):
         """
         Opens the CSV files from the data directory, converting
         them into pandas DataFrames within a pairs dictionary.
@@ -109,13 +136,24 @@ def _open_convert_csv_files(self):
         in a chronological fashion.
         """
         for p in self.pairs:
-            pair_path = os.path.join(self.csv_dir, '%s.csv' % p)
+            pair_path = os.path.join(self.csv_dir, '%s_%s.csv' % (p, date_str))
             self.pair_frames[p] = pd.io.parsers.read_csv(
-                pair_path, header=True, index_col=0, parse_dates=True,
+                pair_path, header=True, index_col=0, 
+                parse_dates=True, dayfirst=True,
                 names=("Time", "Ask", "Bid", "AskVolume", "BidVolume")
             )
             self.pair_frames[p]["Pair"] = p
-        self.all_pairs = pd.concat(self.pair_frames.values()).sort().iterrows()
+        return pd.concat(self.pair_frames.values()).sort().iterrows()
+
+    def _update_csv_for_day(self):
+        try:
+            dt = self.file_dates[self.cur_date_idx+1]
+        except IndexError:  # End of file dates
+            return False
+        else:
+            self.cur_date_pairs = self._open_convert_csv_files_for_day(dt)
+            self.cur_date_idx += 1
+            return True
 
     def stream_next_tick(self):
         """
@@ -130,30 +168,35 @@ def stream_next_tick(self):
         well as updating the current bid/ask and inverse bid/ask.
         """
         try:
-            index, row = next(self.all_pairs)
+            index, row = next(self.cur_date_pairs)
         except StopIteration:
-            return
-        else:
-            getcontext().rounding = ROUND_HALF_DOWN
-            pair = row["Pair"]
-            bid = Decimal(str(row["Bid"])).quantize(
-                Decimal("0.00001")
-            )
-            ask = Decimal(str(row["Ask"])).quantize(
-                Decimal("0.00001")
-            )
+            # End of the current days data
+            if self._update_csv_for_day():
+                index, row = next(self.cur_date_pairs)
+            else: # End of the data
+                self.continue_backtest = False
+                return
+
+        getcontext().rounding = ROUND_HALF_DOWN
+        pair = row["Pair"]
+        bid = Decimal(str(row["Bid"])).quantize(
+            Decimal("0.00001")
+        )
+        ask = Decimal(str(row["Ask"])).quantize(
+            Decimal("0.00001")
+        )
 
-            # Create decimalised prices for traded pair
-            self.prices[pair]["bid"] = bid
-            self.prices[pair]["ask"] = ask
-            self.prices[pair]["time"] = index
+        # Create decimalised prices for traded pair
+        self.prices[pair]["bid"] = bid
+        self.prices[pair]["ask"] = ask
+        self.prices[pair]["time"] = index
 
-            # Create decimalised prices for inverted pair
-            inv_pair, inv_bid, inv_ask = self.invert_prices(pair, bid, ask)
-            self.prices[inv_pair]["bid"] = inv_bid
-            self.prices[inv_pair]["ask"] = inv_ask
-            self.prices[inv_pair]["time"] = index
+        # Create decimalised prices for inverted pair
+        inv_pair, inv_bid, inv_ask = self.invert_prices(pair, bid, ask)
+        self.prices[inv_pair]["bid"] = inv_bid
+        self.prices[inv_pair]["ask"] = inv_ask
+        self.prices[inv_pair]["time"] = index
 
-            # Create the tick event for the queue
-            tev = TickEvent(pair, index, bid, ask)
-            self.events_queue.put(tev)
+        # Create the tick event for the queue
+        tev = TickEvent(pair, index, bid, ask)
+        self.events_queue.put(tev)
diff --git a/performance/performance.py b/performance/performance.py
@@ -26,7 +26,7 @@ def create_drawdowns(pnl):
 
     # Loop over the index range
     for t in range(1, len(idx)):
-        hwm.append(max(hwm[t-1], pnl[t]))
-        drawdown[t]= (hwm[t]-pnl[t])
-        duration[t]= (0 if drawdown[t] == 0 else duration[t-1]+1)
+        hwm.append(max(hwm[t-1], pnl.ix[t]))
+        drawdown.ix[t]= (hwm[t]-pnl.ix[t])
+        duration.ix[t]= (0 if drawdown.ix[t] == 0 else duration.ix[t-1]+1)
     return drawdown, drawdown.max(), duration.max()
diff --git a/scripts/test_performance.py b/scripts/test_performance.py
@@ -0,0 +1,35 @@
+"""
+This is a small helper script written to help debug issues
+with performance calculation, that avoids having to re-run
+the full backtest.
+
+In this case it simply works off the "backtest.csv" file that
+is produced from a backtest.py run.
+"""
+
+
+import os
+
+import pandas as pd
+
+from qsforex.performance.performance import create_drawdowns
+from qsforex.settings import OUTPUT_RESULTS_DIR
+
+
+if __name__ == "__main__":
+    in_filename = "backtest.csv"
+    out_filename = "equity.csv" 
+    in_file = os.path.join(OUTPUT_RESULTS_DIR, in_filename)
+    out_file = os.path.join(OUTPUT_RESULTS_DIR, out_filename)
+
+    # Create equity curve dataframe
+    df = pd.read_csv(in_file, index_col=0)
+    df.dropna(inplace=True)
+    df["Total"] = df.sum(axis=1)
+    df["Returns"] = df["Total"].pct_change()
+    df["Equity"] = (1.0+df["Returns"]).cumprod()
+
+    # Create drawdown statistics
+    drawdown, max_dd, dd_duration = create_drawdowns(df["Equity"])
+    df["Drawdown"] = drawdown
+    df.to_csv(out_file, index=True)