-
Notifications
You must be signed in to change notification settings - Fork 119
/
main.py
158 lines (116 loc) · 5.82 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import streamlit as st
from streamlit_chat import message as st_message
from dotenv import load_dotenv
from chat_utils import create_and_save_directory_embeddings
from streamlit_app_utils import process_summarize_button, generate_answer, load_db_from_file_and_create_if_not_exists, validate_api_key, load_dir_chat_embeddings
from summary_utils import transcript_loader
import pandas as pd
import glob
#Youtube stuff is kinda broken! I'll fix it soon.
load_dotenv('test.env')
st.set_page_config(page_title='BriefGPT')
accepted_filetypes = ['.txt', '.pdf', '.epub']
def summarize():
"""
The main function for the Streamlit app.
:return: None.
"""
st.title("Summarize")
st.write("Summaries are saved to the 'summaries' folder in the project directory.")
input_method = st.radio("Select input method", ('Document', 'YouTube URL'))
if input_method == 'Document':
directory = 'documents'
files = os.listdir(directory)
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
if files:
selected_file = st.selectbox('Select a file', files)
st.write('You selected: ' + selected_file)
selected_file_path = os.path.join(directory, selected_file)
else:
st.write('No documents found in documents folder. Add some documents first!')
return
if input_method == 'YouTube URL':
youtube_url = st.text_input("Enter a YouTube URL to summarize")
use_gpt_4 = st.checkbox("Use GPT-4 for the final prompt (STRONGLY recommended, requires GPT-4 API access - progress bar will appear to get stuck as GPT-4 is slow)", value=True)
find_clusters = st.checkbox('Optimal clustering (saves on tokens)', value=False)
if st.button('Summarize (click once and wait)'):
if input_method == 'Document':
process_summarize_button(selected_file_path, use_gpt_4, find_clusters)
else:
doc = transcript_loader(youtube_url)
process_summarize_button(doc, use_gpt_4, find_clusters, file=False)
def chat():
dir_or_doc = st.radio('Select a chat method', ('Document', 'Directory'))
st.title('Chat')
model_name = st.radio('Select a model', ('gpt-3.5-turbo', 'gpt-4'))
if dir_or_doc == 'Document':
if 'text_input' not in st.session_state:
st.session_state.text_input = ''
directory = 'documents'
files = os.listdir(directory)
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
selected_file = st.selectbox('Select a file', files)
st.write('You selected: ' + selected_file)
selected_file_path = os.path.join(directory, selected_file)
if st.button('Load file (first time might take a second...) pressing this button will reset the chat history'):
db = load_db_from_file_and_create_if_not_exists(selected_file_path)
st.session_state.db = db
st.session_state.history = []
else:
if 'text_input' not in st.session_state:
st.session_state.text_input = ''
load_or_create = st.checkbox('Load from existing directory (already embedded)', value=False)
if load_or_create:
embeddings = os.listdir('directory_embeddings')
embeddings = [file for file in embeddings if file.endswith('.faiss')]
select_embedding = st.selectbox('Select an embedding', embeddings)
load = st.button('Load embeddings')
if load:
embedding_file_path = os.path.join('directory_embeddings', select_embedding)
db = load_dir_chat_embeddings(embedding_file_path)
st.session_state.db = db
st.session_state.history = []
else:
directory = st.text_input('Enter a directory to load from - just "documents" will load the default documents folder')
name = st.text_input('Enter a unique nickname for the directory')
if st.button('Load directory (first time might take a second...) pressing this button will reset the chat history'):
with st.spinner('Loading directory...'):
db = create_and_save_directory_embeddings(directory, name)
st.session_state.db = db
st.success('Directory loaded successfully')
st.session_state.history = []
user_input = st.text_input('Enter your question', key='text_input')
if st.button('Ask') and 'db' in st.session_state and validate_api_key(model_name):
answer = generate_answer(st.session_state.db, model_name)
if 'history' not in st.session_state:
st.session_state.history = []
if 'sources' not in st.session_state:
st.session_state.sources = []
for i, chat in enumerate(st.session_state.history):
st_message(**chat, key=str(i))
for i, source in enumerate(st.session_state.sources):
with st.expander('Sources', expanded=False):
st.markdown(source)
def documents():
st.title('Documents')
st.markdown('Documents are stored in the documents folder in the project directory.')
directory = 'documents'
files = os.listdir(directory)
files = [file for file in files if file.endswith(tuple(accepted_filetypes))]
if files:
files_df = pd.DataFrame(files, columns=['File Name'], index=range(1, len(files) + 1))
st.dataframe(files_df, width=1000)
else:
st.write('No documents found in documents folder. Add some documents first!')
PAGES = {
"Chat": chat,
"Summarize": summarize,
"Documents": documents,
}
st.sidebar.title("Navigation")
selection = st.sidebar.radio("Go to", list(PAGES.keys()))
st.sidebar.markdown(' [Contact author](mailto:ethanujohnston@gmail.com)')
st.sidebar.markdown(' [Github](https://github.com/e-johnstonn/docGPT)')
page = PAGES[selection]
page()