Skip to content

Commit

Permalink
chore: update demo_mysql_nacos
Browse files Browse the repository at this point in the history
  • Loading branch information
shengchenyang committed May 17, 2024
1 parent e8b3dce commit de582c8
Showing 1 changed file with 12 additions and 23 deletions.
35 changes: 12 additions & 23 deletions DemoSpider/spiders/demo_mysql_nacos.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# 存储至 mysql 场景,配置从 nacos 获取
import configparser
import json
from typing import Any, Iterable

from ayugespidertools.items import AyuItem
Expand All @@ -14,8 +13,8 @@

class DemoMysqlNacosSpider(AyuSpider):
name = "demo_mysql_nacos"
allowed_domains = ["blog.csdn.net"]
start_urls = ["https://blog.csdn.net/"]
allowed_domains = ["readthedocs.io"]
start_urls = ["https://readthedocs.io"]
custom_settings = {
# 开启远程配置服务
"APP_CONF_MANAGE": True,
Expand Down Expand Up @@ -45,31 +44,21 @@ def update_settings(cls, settings: Settings) -> None:
settings.set("REMOTE_CONFIG", _remote_conf, priority="spider")

def start_requests(self) -> Iterable[Request]:
"""get 请求首页,获取项目列表数据"""
yield Request(
url="https://blog.csdn.net/phoenix/web/blog/hot-rank?page=0&pageSize=25&type=",
url="https://ayugespidertools.readthedocs.io/en/latest/",
callback=self.parse_first,
headers={
"referer": "https://blog.csdn.net/rank/list",
},
)

def parse_first(self, response: ScrapyResponse) -> Any:
data_list = json.loads(response.text)["data"]
for curr_data in data_list:
article_detail_url = curr_data.get("articleDetailUrl")
article_title = curr_data.get("articleTitle")
comment_count = curr_data.get("commentCount")
favor_count = curr_data.get("favorCount")
nick_name = curr_data.get("nickName")
li_list = response.xpath('//div[@aria-label="Navigation menu"]/ul/li')
for curr_li in li_list:
octree_text = curr_li.xpath("a/text()").get()
octree_href = curr_li.xpath("a/@href").get()

article_item = AyuItem(
article_detail_url=article_detail_url,
article_title=article_title,
comment_count=comment_count,
favor_count=favor_count,
nick_name=nick_name,
octree_item = AyuItem(
octree_text=octree_text,
octree_href=octree_href,
_table="demo_mysql_nacos",
)
self.slog.info(f"article_item: {article_item}")
yield article_item
self.slog.info(f"octree_item: {octree_item}")
yield octree_item

0 comments on commit de582c8

Please sign in to comment.