forked from Python3WebSpider/ProxyPool
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Create ip89.py www.89ip.cn 免费代理 * Update ip89.py update Class name * Create fatezero_proxylist.py 增加 http://proxylist.fatezero.org/ 代理 * Create ihuan.py i幻 代理
- Loading branch information
1 parent
9912b98
commit e3bbd55
Showing
3 changed files
with
99 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from proxypool.schemas.proxy import Proxy | ||
from proxypool.crawlers.base import BaseCrawler | ||
import re | ||
import json | ||
BASE_URL = 'http://proxylist.fatezero.org/proxy.list' | ||
|
||
|
||
class FatezeroCrawler(BaseCrawler): | ||
""" | ||
Fatezero crawler,http://proxylist.fatezero.org | ||
""" | ||
urls = [BASE_URL] | ||
|
||
def parse(self, html): | ||
""" | ||
parse html file to get proxies | ||
:return: | ||
""" | ||
|
||
hosts_ports = html.split('\n') | ||
for addr in hosts_ports: | ||
ip_address = json.loads(addr) | ||
if(True): | ||
host = ip_address['host'] | ||
port = ip_address['port'] | ||
yield Proxy(host=host, port=port) | ||
|
||
|
||
if __name__ == '__main__': | ||
crawler = FatezeroCrawler() | ||
for proxy in crawler.crawl(): | ||
print(proxy) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
from proxypool.schemas.proxy import Proxy | ||
from proxypool.crawlers.base import BaseCrawler | ||
import re | ||
from pyquery import PyQuery as pq | ||
import time | ||
BASE_URL = 'https://ip.ihuan.me/today/{path}.html' | ||
|
||
|
||
class IhuanCrawler(BaseCrawler): | ||
""" | ||
ip ihuan crawler, https://ip.ihuan.me | ||
""" | ||
urls = [BASE_URL.format(path=time.strftime("%Y/%m/%d/%H", time.localtime()))] | ||
|
||
def parse(self, html): | ||
""" | ||
parse html file to get proxies | ||
:return: | ||
""" | ||
# doc = pq(html)('.text-left') | ||
ip_address = re.compile('([\d:\.]*).*?<br>') | ||
hosts_ports = ip_address.findall(html) | ||
for addr in hosts_ports: | ||
addr_split = addr.split(':') | ||
if(len(addr_split) == 2): | ||
host = addr_split[0] | ||
port = addr_split[1] | ||
yield Proxy(host=host, port=port) | ||
|
||
|
||
if __name__ == '__main__': | ||
crawler = IhuanCrawler() | ||
for proxy in crawler.crawl(): | ||
print(proxy) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
from proxypool.schemas.proxy import Proxy | ||
from proxypool.crawlers.base import BaseCrawler | ||
import re | ||
|
||
MAX_NUM = 9999 | ||
BASE_URL = 'http://api.89ip.cn/tqdl.html?api=1&num={MAX_NUM}&port=&address=&isp='.format(MAX_NUM=MAX_NUM) | ||
|
||
|
||
class Ip89Crawler(BaseCrawler): | ||
""" | ||
89ip crawler, http://api.89ip.cn | ||
""" | ||
urls = [BASE_URL] | ||
|
||
def parse(self, html): | ||
""" | ||
parse html file to get proxies | ||
:return: | ||
""" | ||
ip_address = re.compile('([\d:\.]*)<br>') | ||
hosts_ports = ip_address.findall(html) | ||
for addr in hosts_ports: | ||
addr_split = addr.split(':') | ||
if(len(addr_split) == 2): | ||
host = addr_split[0] | ||
port = addr_split[1] | ||
yield Proxy(host=host, port=port) | ||
|
||
|
||
if __name__ == '__main__': | ||
crawler = Ip89Crawler() | ||
for proxy in crawler.crawl(): | ||
print(proxy) |