Skip to content

Commit

Permalink
解决bug:ZhandayeCrawler没有headers属性 (Python3WebSpider#76)
Browse files Browse the repository at this point in the history
* 解决bug:ZhandayeCrawler没有headers属性

解决bug: AttributeError: 'ZhandayeCrawler' object has no attribute 'headers'

* Add: 在getter中进行重试时,增加时间间隔
  • Loading branch information
winturn committed Jul 11, 2020
1 parent 50a8f7e commit b42c77f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion proxypool/crawlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class BaseCrawler(object):
urls = []

@retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None)
@retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None, wait_fixed=2000)
def fetch(self, url, **kwargs):
try:
response = requests.get(url, **kwargs)
Expand Down
11 changes: 6 additions & 5 deletions proxypool/crawlers/public/zhandaye.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@ class ZhandayeCrawler(BaseCrawler):
"""
urls = [BASE_URL.format(page=page) for page in range(1, MAX_PAGE)]

headers = {
'User-Agent': 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}

def crawl(self):
for url in self.urls:
logger.info(f'fetching {url}')
if not self.headers:
html = self.fetch(url)
else:
html = self.fetch(url, headers=self.headers)
html = self.fetch(url, headers=self.headers)
self.parse(html)

def parse(self, html):
Expand Down Expand Up @@ -56,4 +57,4 @@ def parse(self, html):
if __name__ == '__main__':
crawler = ZhandayeCrawler()
for proxy in crawler.crawl():
print(proxy)
print(proxy)

0 comments on commit b42c77f

Please sign in to comment.