Skip to content

Commit

Permalink
Revision 9 Socket handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mknithin committed Nov 2, 2015
1 parent 9d23130 commit dd08e02
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions cannes.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def get_company_from_file(self,file_name):
def get_company_from_google(self,company_list):
#link=[]
#loc_list=['"MCFIVA (THAILAND) CO.,LTD."','"MIR" INTERGOVERNMENTAL TV AND RADIO.']
black_list=['http://www.imdb.com','https://www.facebook.com',' http://www.youtube.com/','https://www.linkedin.com/',' https://en.wikipedia.org']
for cmpn in company_list:
print "Searching emails for : %s" %cmpn
query = urllib.urlencode({'q': cmpn})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % query
search_response = urllib.urlopen(url)
Expand All @@ -66,6 +68,8 @@ def get_company_from_google(self,company_list):
#print h['url']
#link.append((h['url']).encode("utf-8"))
link=(h['url']).encode("utf-8")
if link in black_list:
continue
print link
email=self.get_email_from_link(link,self.depth)
self.put_email_to_file(email)
Expand All @@ -76,12 +80,15 @@ def get_email_from_link(self,link,depth):
print "Extracting emails >>>>>>"
emails = defaultdict(int)
for url in e.crawl_site('%s' %link, depth):
for email in e.grab_email(e.urltext(url)):
if not emails.has_key(email):
if('reedmidem.com' in email):
continue
else:
email_link.append(email)
try:
for email in e.grab_email(e.urltext(url)):
if not emails.has_key(email):
if('reedmidem.com' in email):
continue
else:
email_link.append(email)
except:
continue
return email_link

def put_email_to_file(self,email):
Expand Down

0 comments on commit dd08e02

Please sign in to comment.