解决bug：ZhandayeCrawler没有headers属性 (Python3WebSpider#76)

* 解决bug：ZhandayeCrawler没有headers属性解决bug: AttributeError: 'ZhandayeCrawler' object has no attribute 'headers' * Add: 在getter中进行重试时，增加时间间隔
wang-weart · Jul 11, 2020 · b42c77f · b42c77f
1 parent 50a8f7e
commit b42c77f
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 6 deletions.
diff --git a/proxypool/crawlers/base.py b/proxypool/crawlers/base.py
@@ -6,7 +6,7 @@
 class BaseCrawler(object):
     urls = []
 
-    @retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None)
+    @retry(stop_max_attempt_number=3, retry_on_result=lambda x: x is None, wait_fixed=2000)
     def fetch(self, url, **kwargs):
         try:
             response = requests.get(url, **kwargs)

diff --git a/proxypool/crawlers/public/zhandaye.py b/proxypool/crawlers/public/zhandaye.py
@@ -13,13 +13,14 @@ class ZhandayeCrawler(BaseCrawler):
     """
     urls = [BASE_URL.format(page=page) for page in range(1, MAX_PAGE)]
 
+    headers = {
+        'User-Agent': 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
+    }
+
     def crawl(self):
         for url in self.urls:
             logger.info(f'fetching {url}')
-            if not self.headers:
-                html = self.fetch(url)
-            else:
-                html = self.fetch(url, headers=self.headers)
+            html = self.fetch(url, headers=self.headers)
             self.parse(html)
 
     def parse(self, html):
@@ -56,4 +57,4 @@ def parse(self, html):
 if __name__ == '__main__':
     crawler = ZhandayeCrawler()
     for proxy in crawler.crawl():
-        print(proxy)
+        print(proxy)