From de582c82860ccb0f84a0c562f7e0380590ef172b Mon Sep 17 00:00:00 2001 From: shengchenyang <15538221825@163.com> Date: Fri, 17 May 2024 19:29:27 +0800 Subject: [PATCH] chore: update demo_mysql_nacos --- DemoSpider/spiders/demo_mysql_nacos.py | 35 +++++++++----------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/DemoSpider/spiders/demo_mysql_nacos.py b/DemoSpider/spiders/demo_mysql_nacos.py index 58d6c1b..ec5e405 100644 --- a/DemoSpider/spiders/demo_mysql_nacos.py +++ b/DemoSpider/spiders/demo_mysql_nacos.py @@ -1,6 +1,5 @@ # 存储至 mysql 场景,配置从 nacos 获取 import configparser -import json from typing import Any, Iterable from ayugespidertools.items import AyuItem @@ -14,8 +13,8 @@ class DemoMysqlNacosSpider(AyuSpider): name = "demo_mysql_nacos" - allowed_domains = ["blog.csdn.net"] - start_urls = ["https://blog.csdn.net/"] + allowed_domains = ["readthedocs.io"] + start_urls = ["https://readthedocs.io"] custom_settings = { # 开启远程配置服务 "APP_CONF_MANAGE": True, @@ -45,31 +44,21 @@ def update_settings(cls, settings: Settings) -> None: settings.set("REMOTE_CONFIG", _remote_conf, priority="spider") def start_requests(self) -> Iterable[Request]: - """get 请求首页,获取项目列表数据""" yield Request( - url="https://blog.csdn.net/phoenix/web/blog/hot-rank?page=0&pageSize=25&type=", + url="https://ayugespidertools.readthedocs.io/en/latest/", callback=self.parse_first, - headers={ - "referer": "https://blog.csdn.net/rank/list", - }, ) def parse_first(self, response: ScrapyResponse) -> Any: - data_list = json.loads(response.text)["data"] - for curr_data in data_list: - article_detail_url = curr_data.get("articleDetailUrl") - article_title = curr_data.get("articleTitle") - comment_count = curr_data.get("commentCount") - favor_count = curr_data.get("favorCount") - nick_name = curr_data.get("nickName") + li_list = response.xpath('//div[@aria-label="Navigation menu"]/ul/li') + for curr_li in li_list: + octree_text = curr_li.xpath("a/text()").get() + octree_href = curr_li.xpath("a/@href").get() - article_item = AyuItem( - article_detail_url=article_detail_url, - article_title=article_title, - comment_count=comment_count, - favor_count=favor_count, - nick_name=nick_name, + octree_item = AyuItem( + octree_text=octree_text, + octree_href=octree_href, _table="demo_mysql_nacos", ) - self.slog.info(f"article_item: {article_item}") - yield article_item + self.slog.info(f"octree_item: {octree_item}") + yield octree_item