From dd08e02235177c61679e885191c77d8ee1f12997 Mon Sep 17 00:00:00 2001
From: nithin-mk <desirenithinmk@gmail.com>
Date: Mon, 2 Nov 2015 20:04:30 +0530
Subject: [PATCH] Revision 9 Socket handling

---
 cannes.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/cannes.py b/cannes.py
index 3ce94aa..d61aa96 100755
--- a/cannes.py
+++ b/cannes.py
@@ -53,7 +53,9 @@ def get_company_from_file(self,file_name):
 	def get_company_from_google(self,company_list):
 		#link=[]
 		#loc_list=['"MCFIVA (THAILAND) CO.,LTD."','"MIR" INTERGOVERNMENTAL TV AND RADIO.']
+		black_list=['http://www.imdb.com','https://www.facebook.com',' http://www.youtube.com/','https://www.linkedin.com/',' https://en.wikipedia.org']
 		for cmpn in company_list:
+			print "Searching emails for : %s" %cmpn
 			query = urllib.urlencode({'q': cmpn})
   			url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % query
   			search_response = urllib.urlopen(url)
@@ -66,6 +68,8 @@ def get_company_from_google(self,company_list):
   					#print h['url']	
 					#link.append((h['url']).encode("utf-8"))
 					link=(h['url']).encode("utf-8")
+					if link in black_list:
+						continue
 					print link
 					email=self.get_email_from_link(link,self.depth)
 					self.put_email_to_file(email)
@@ -76,12 +80,15 @@ def get_email_from_link(self,link,depth):
 		print "Extracting emails >>>>>>"
 		emails = defaultdict(int)
 		for url in e.crawl_site('%s' %link, depth):
-			for email in e.grab_email(e.urltext(url)):
-				if not emails.has_key(email):
-					if('reedmidem.com' in email):
-						continue
-					else:
-						email_link.append(email)
+			try:
+				for email in e.grab_email(e.urltext(url)):
+					if not emails.has_key(email):
+						if('reedmidem.com' in email):
+							continue
+						else:
+							email_link.append(email)
+			except:
+				continue
 		return email_link
 
 	def put_email_to_file(self,email):