From 2faec67ecff90becb26e2defb59dcbc23a8ecfee Mon Sep 17 00:00:00 2001
From: Chad A Simmons <chad.simmons@member.fsf.org>
Date: Thu, 21 Mar 2024 20:25:56 -0400
Subject: [PATCH] update read_csv to use uint16 for quantity and
 categoricalDtype for symbol and display_name to save memory.

---
 AnalyzeLots.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/AnalyzeLots.py b/AnalyzeLots.py
index 5f12bd1..9efcb23 100755
--- a/AnalyzeLots.py
+++ b/AnalyzeLots.py
@@ -13,9 +13,10 @@ def main() -> None:
     pd.options.mode.copy_on_write = True
     if args.file.is_file():
         data: pd.DataFrame = pd.read_csv(args.file, header=1, low_memory=False, memory_map=True, parse_dates=['date'],
-                                         dtype={'quantity': np.uint32, 'cost': np.float32, 'value': np.float32,
-                                                'gain': np.float32},
-                                         names=['symbol', 'display_name', 'date', 'cost', 'quantity', 'value', 'gain'])
+                                         names=['symbol', 'display_name', 'date', 'cost', 'quantity', 'value', 'gain'],
+                                         dtype={'quantity': np.uint16, 'cost': np.float32, 'value': np.float32,
+                                                'gain': np.float32, 'symbol': pd.CategoricalDtype(),
+                                                'display_name': pd.CategoricalDtype()})
         is_short: pd.Series[bool] = data['date'] > (datetime.now() - timedelta(days=(365 - args.days)))
         is_long: pd.Series[bool] = ~is_short
         if args.live: