Skip to content

Commit

Permalink
Revision 12 Mip track number
Browse files Browse the repository at this point in the history
  • Loading branch information
mknithin committed Nov 3, 2015
1 parent f3bc27f commit 77ac75a
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions mip.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
import os.path #for output file deletion

class MipCrawler:
def __init__(self,starting_url,depth):
def __init__(self,starting_url,depth,start_record):
self.starting_url=starting_url
self.depth=depth
self.companies=[]
self.company_link=[]
self.email_out=[]
self.start_record=start_record

def crawl(self):

Expand Down Expand Up @@ -58,7 +59,7 @@ def get_company_from_google(self,company_list):
#loc_list=['"MCFIVA (THAILAND) CO.,LTD."','"MIR" INTERGOVERNMENTAL TV AND RADIO.']
for cmpn in company_list:
print "--------------------------------------------------------"
print "Google Search for : %d.%s" %(cmpn_no,cmpn)
print "Google Search for : %d.%s(track:%d)" %(cmpn_no,cmpn,(self.start_record+(cmpn_no-1)))
query = urllib.urlencode({'q': cmpn.encode("utf-8")})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % query
search_response = urllib.urlopen(url)
Expand Down Expand Up @@ -120,10 +121,10 @@ def __init__(self,company_name):
os.remove("output.txt")
base_url='http://www.my-mip.com/en/online-database/mipcom/companies/?rpp=64&startRecord='
batch=0
start_record=1
start_record=65
while start_record < 4609:
print "Batch:%d Start_record:%d"%((batch+1),start_record)
crawler=MipCrawler('%s' %base_url+str(start_record),10)
crawler=MipCrawler('%s' %base_url+str(start_record),10,start_record)
crawler.crawl()
batch+=1
start_record+=64

0 comments on commit 77ac75a

Please sign in to comment.