-
Notifications
You must be signed in to change notification settings - Fork 0
/
mboxreader.py
141 lines (108 loc) · 4.77 KB
/
mboxreader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
"""
MBox-Reader
This Python script analyzes an mbox file, exported from GMail, to find
your most common email senders. I wrote this to clear out my GMail storage
space as it was hitting 50%, deleting promotional email newsletters.
Brian Lam
"""
from collections import defaultdict
import os.path
from tkinter import *
from tkinter import filedialog
from tkinter import messagebox
from tkinter import ttk
import heapq
import webbrowser
class MBoxReader():
def __init__(self):
# Defaultdict to store sender data and count of emails
self.senders = defaultdict(int)
# Store most common senders in a max heap for quick retrieval. To ease
# the burden of the n*lg(n) total insertion time, only senders with counts
# greater than the cutoff will be inserted.
self.resultHeap = []
# Retrieve command line arguments from parser
self.mboxFilePath = ""
self.resultCutoff = 50
self.initializeGraphicalUserInterface()
def processLine(self, line):
if (line.startswith("From: ")):
# Remove "From: " from the beginning of the string
senderOnly = line[6:]
# Strip newlines
senderEmail = senderOnly.strip()
# Increment count for this sender
self.senders[senderEmail] += 1
def processSender(self, sender, count):
# Add sender to results list if they are above the count cutoff
if (count > self.resultCutoff):
self.resultHeap.append((count, sender))
def processResults(self):
# Iterate through the senders list
for sender, count in self.senders.items():
self.processSender(sender, count)
# Max heapify the list
heapq._heapify_max(self.resultHeap)
def outputResults(self):
self.step1Label.grid_remove()
self.step2Label.grid_remove()
self.fileButton.grid_remove()
step4Label = ttk.Label(self.content, text="Results", cursor="hand2", wraplength=350)
step4Label.grid(column=0, row=0, columnspan=3, rowspan=2, padx=5, pady=5)
resultsText = Text(self.content, width=60, height=30)
resultsText.grid(column=0, row=1, columnspan=3, rowspan=2, padx=5, pady=5)
# Pop results from maxheap, showing most popular sender first
while len(self.resultHeap) > 0:
count, sender = heapq._heappop_max(self.resultHeap)
senderEmail = sender[sender.find("<")+len("<"):sender.rfind(">")]
resultsText.insert(END, senderEmail + " - " + str(count) + "\n")
# Process the mbox file if it exists
def readFile(self):
if not os.path.isfile(self.mboxFilePath):
return
with open(self.mboxFilePath, encoding="utf8") as mboxFile:
# Process each line in the mbox file. An mbox file can be large, so we
# only store one line in memory at a time.
for line in mboxFile:
self.processLine(line)
self.processResults()
self.outputResults()
# Open the web browser to download mbox file
def openBrowserToDownloadMbox(self, event):
webbrowser.open_new("https://takeout.google.com/settings/takeout")
"""
GUI Setup
"""
# Launch file browser
def browseMboxPath(self):
Tk().withdraw()
global mboxFilePath
self.mboxFilePath = filedialog.askopenfilename(initialdir="/",title="Select MBox file", filetypes=[("Mailbox Archive","*.mbox")])
self.readFile()
# GUI Initialization
def initializeGraphicalUserInterface(self):
root = Tk()
root.title("GMail Clutter Finder")
# Setup frame
self.content = ttk.Frame(root)
self.mainframe = ttk.Frame(root, padding="3 3 12 12")
self.mainframe.columnconfigure(0, weight=1)
self.mainframe.rowconfigure(0, weight=1)
# Setup widgets
self.step1Label = ttk.Label(self.content, text="Step 1: Click here to download your GMail inbox (.mbox). Make sure only \"Mail\" is selected.", cursor="hand2", wraplength=350)
self.step1Label.pack()
self.step1Label.bind("<Button-1>", self.openBrowserToDownloadMbox)
self.step2Label = ttk.Label(self.content, text="Step 2: Got your mbox file? Cool, let's find it. It's in that zip file you got, under Takeout\Mail.", \
wraplength=350)
self.fileButton= Button(root, text="Find your .mbox file", command=self.browseMboxPath)
# Place widgets
self.content.grid(column=0, row=0)
self.step1Label.grid(column=0, row=0, padx=5, pady=5)
self.step2Label.grid(column=0, row=5, padx=5, pady=5)
self.fileButton.grid(column=0, row=7)
root.mainloop()
"""
Program start
"""
if __name__=="__main__":
MBoxReader()