diff --git a/.gitignore b/.gitignore index 68bc17f..7a3d4b1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,160 +1,2 @@ -# Byte-compiled / optimized / DLL files __pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +config.json diff --git a/README.md b/README.md index 551ffae..3268e44 100644 --- a/README.md +++ b/README.md @@ -1 +1,16 @@ -# jina_sumary \ No newline at end of file +# jina_sumary +ChatGPT on WeChat项目插件, 使用jina reader和ChatGPT总结网页链接内容 + +微信公众号链接近期更新了,公众号卡片链接会进行校验,暂时没有找到合适的方法从公众号卡片链接获取到直接链接,但是此插件能总结公众号直接链接,和其他卡片链接(如小红书,哔哩哔哩等) + +config.json 配置说明 +```json +{ + "jina_reader_base": "https://r.jina.ai", // jina reader链接,默认为https://r.jina.ai + "open_ai_api_base": "https://api.openai.com/v1", // chatgpt chat url + "open_ai_api_key": "sk-xxx", // chatgpt api key + "open_ai_model": "gpt-3.5-turbo", // chatgpt model + "max_words": 8000, // 网页链接内容的最大字数,防止超过最大输入token,使用字符串长度简单计数 + "prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" // 链接内容总结提示词 +} +``` \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..13115c6 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +from .jina_sum import * diff --git a/config.json.template b/config.json.template new file mode 100644 index 0000000..a4aeebe --- /dev/null +++ b/config.json.template @@ -0,0 +1,8 @@ +{ + "jina_reader_base": "https://r.jina.ai", + "open_ai_api_base": "https://api.openai.com/v1", + "open_ai_api_key": "sk-xxx", + "open_ai_model": "gpt-3.5-turbo", + "max_words": 8000, + "prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" +} diff --git a/jina_sum.py b/jina_sum.py new file mode 100644 index 0000000..9f4ba05 --- /dev/null +++ b/jina_sum.py @@ -0,0 +1,133 @@ +# encoding:utf-8 +import json +import os +from urllib.parse import urlparse + +import requests + +import plugins +from bridge.context import ContextType +from bridge.reply import Reply, ReplyType +from common.log import logger +from plugins import * + +@plugins.register( + name="JinaSum", + desire_priority=110, + hidden=False, + desc="Sum url link content with jina reader and llm", + version="0.1", + author="hanfangyuan", +) +class JinaSum(Plugin): + + jina_reader_base = "https://r.jina.ai" + open_ai_api_base = "https://api.openai.com/v1" + open_ai_model = "gpt-3.5-turbo" + max_words = 8000 + prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n" + + def __init__(self): + super().__init__() + try: + self.config = super().load_config() + if not self.config: + self.config = self._load_config_template() + self.jina_reader_base = self.config.get("jina_reader_base", self.jina_reader_base) + self.open_ai_api_base = self.config.get("open_ai_api_base", self.open_ai_api_base) + self.open_ai_api_key = self.config.get("open_ai_api_key", "") + self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model) + self.max_words = self.config.get("max_words", self.max_words) + self.prompt = self.config.get("prompt", self.prompt) + logger.info(f"[JinaSum] inited, config={self.config}") + self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context + except Exception as e: + logger.error(f"[JinaSum] 初始化异常:{e}") + raise "[JinaSum] init failed, ignore " + + def on_handle_context(self, e_context: EventContext, retry_count: int = 0): + try: + context = e_context["context"] + content = context.content + if context.type != ContextType.SHARING and context.type != ContextType.TEXT: + return + + if not self._check_url(content): + logger.debug(f"[JinaSum] {content} not a url, skip") + return + if retry_count == 0: + logger.debug("[JinaSum] on_handle_context. content: %s" % content) + reply = Reply(ReplyType.TEXT, "🎉正在为您生成总结,请稍候...") + channel = e_context["channel"] + channel.send(reply, context) + + target_url = content + jina_url = self._get_jina_url(target_url) + response = requests.get(jina_url, timeout=60) + response.raise_for_status() + target_url_content = response.text + + openai_chat_url = self._get_openai_chat_url() + openai_headers = self._get_openai_headers() + openai_payload = self._get_openai_payload(target_url_content) + logger.debug(f"[JinaSum] openai_chat_url: {openai_chat_url}, openai_headers: {openai_headers}, openai_payload: {openai_payload}") + response = requests.post(openai_chat_url, headers=openai_headers, json=openai_payload, timeout=60) + response.raise_for_status() + result = response.json()['choices'][0]['message']['content'] + reply = Reply(ReplyType.TEXT, result) + e_context["reply"] = reply + e_context.action = EventAction.BREAK_PASS + + except Exception as e: + if retry_count < 3: + logger.warning(f"[JinaSum] {str(e)}, retry {retry_count + 1}") + self.on_handle_context(e_context, retry_count + 1) + return + + logger.exception(f"[JinaSum] {str(e)}") + reply = Reply(ReplyType.ERROR, "我暂时无法总结链接,请稍后再试") + e_context["reply"] = reply + e_context.action = EventAction.BREAK_PASS + + def get_help_text(self, verbose, **kwargs): + return f'使用jina reader和ChatGPT总结网页链接内容' + + def _load_config_template(self): + logger.debug("No Suno plugin config.json, use plugins/jina_sum/config.json.template") + try: + plugin_config_path = os.path.join(self.path, "config.json.template") + if os.path.exists(plugin_config_path): + with open(plugin_config_path, "r", encoding="utf-8") as f: + plugin_conf = json.load(f) + return plugin_conf + except Exception as e: + logger.exception(e) + + def _get_jina_url(self, target_url): + return self.jina_reader_base + "/" + target_url + + def _get_openai_chat_url(self): + return self.open_ai_api_base + "/chat/completions" + + def _get_openai_headers(self): + return { + 'Authorization': f"Bearer {self.open_ai_api_key}", + 'Host': urlparse(self.open_ai_api_base).netloc + } + + def _get_openai_payload(self, target_url_content): + target_url_content = target_url_content[:self.max_words] # 通过字符串长度简单进行截断 + sum_prompt = f"{self.prompt}\n\n'''{target_url_content}'''" + messages = [{"role": "user", "content": sum_prompt}] + payload = { + 'model': self.open_ai_model, + 'messages': messages + } + return payload + + def _check_url(self, target_url: str): + # 微信官方做了校验,公众号卡片链接无法直接访问 + if target_url.strip().startswith("https://mp.weixin.qq.com/s?__biz=") or target_url.strip().startswith("http://mp.weixin.qq.com/s?__biz="): + return False + # 简单校验是否是url + return target_url.strip().startswith("http://") or target_url.strip().startswith("https://")