forked from asavinov/intelligent-trading-bot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
download_yahoo.py
90 lines (63 loc) · 2.76 KB
/
download_yahoo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from datetime import datetime, date, timedelta
import click
import yfinance as yf
from service.App import *
"""
Download quotes from Yahoo
"""
@click.command()
@click.option('--config_file', '-c', type=click.Path(), default='', help='Configuration file name')
def main(config_file):
"""
"""
load_config(config_file)
time_column = App.config["time_column"]
data_path = Path(App.config["data_folder"])
now = datetime.now()
data_sources = App.config["data_sources"]
for ds in data_sources:
# Assumption: folder name is equal to the symbol name we want to download
quote = ds.get("folder")
if not quote:
print(f"ERROR. Folder is not specified.")
continue
# If file name is not specified then use symbol name as file name
file = ds.get("file", quote)
if not file:
file = quote
print(f"Start downloading '{quote}' ...")
file_path = data_path / quote
file_path.mkdir(parents=True, exist_ok=True) # Ensure that folder exists
file_name = (file_path / file).with_suffix(".csv")
if file_name.is_file():
df = pd.read_csv(file_name, parse_dates=[time_column], date_format="ISO8601")
#df['Date'] = pd.to_datetime(df['Date'], format="ISO8601") # "2022-06-07" iso format
df[time_column] = df[time_column].dt.date
last_date = df.iloc[-1][time_column]
# === Download from the remote server
new_df = yf.download(quote, last_date - timedelta(days=5)) # Download somewhat more than we need
new_df = new_df.reset_index()
new_df['Date'] = pd.to_datetime(new_df['Date'], format="ISO8601").dt.date
del new_df['Close']
new_df.rename({'Adj Close': 'Close', 'Date': time_column}, axis=1, inplace=True)
new_df.columns = new_df.columns.str.lower()
df = pd.concat([df, new_df])
df = df.drop_duplicates(subset=[time_column], keep="last")
else:
print(f"File not found. Full fetch...")
# === Download from the remote server
df = yf.download(quote, date(1990, 1, 1))
df = df.reset_index()
df['Date'] = pd.to_datetime(df['Date'], format="ISO8601").dt.date
del df['Close']
df.rename({'Adj Close': 'Close', 'Date': time_column}, axis=1, inplace=True)
df.columns = df.columns.str.lower()
print(f"Full fetch finished.")
df = df.sort_values(by=time_column)
df.to_csv(file_name, index=False)
print(f"Stored in '{file_name}'")
elapsed = datetime.now() - now
print(f"Finished downloading data in {str(elapsed).split('.')[0]}")
return df
if __name__ == '__main__':
main()