├── memflow ├── __init__.py ├── common │ ├── __init__.py │ ├── customjsonencoder.py │ ├── logging.py │ └── response.py ├── tasks │ ├── __init__.py │ └── cuboxsynctask.py ├── models │ ├── __init__.py │ └── syncrecord.py ├── exceptions.py ├── memapi.py ├── utils.py ├── main.py └── databases.py ├── requirements.txt ├── app ├── conf │ └── supervisord.conf └── start.sh ├── docker-compose.yml ├── Dockerfile ├── LICENSE ├── README.md └── .gitignore /memflow/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /memflow/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /memflow/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /memflow/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .syncrecord import SyncRecord 2 | -------------------------------------------------------------------------------- /memflow/exceptions.py: -------------------------------------------------------------------------------- 1 | class CuboxErrorException(RuntimeError): 2 | def __init__(self, message): 3 | self.message = message 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | uvicorn[standard] 2 | fastapi==0.104.1 3 | tenacity==8.2.3 4 | httpx==0.25.1 5 | dataclasses-json==0.6.1 6 | SQLAlchemy==2.0.23 7 | trafilatura==1.6.2 8 | inject==5.1.0 9 | APScheduler==3.10.4 10 | trafilatura==1.6.2 11 | markdownify==0.11.6 -------------------------------------------------------------------------------- /app/conf/supervisord.conf: -------------------------------------------------------------------------------- 1 | [supervisord] 2 | nodaemon=true 3 | 4 | [program:memflow] 5 | user = memflow 6 | directory = /app 7 | command = python memflow/main.py 8 | startsecs=3 9 | autostart = true 10 | autorestart = true 11 | redirect_stderr=true 12 | stdout_logfile=/dev/fd/1 13 | stdout_logfile_maxbytes=0 -------------------------------------------------------------------------------- /memflow/common/customjsonencoder.py: -------------------------------------------------------------------------------- 1 | import json 2 | import datetime 3 | 4 | 5 | class CustomJSONEncoder(json.JSONEncoder): 6 | def default(self, obj): 7 | if isinstance(obj, datetime.datetime): 8 | return obj.strftime("%Y-%m-%d %H:%M:%S") 9 | return super().default(obj) 10 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | services: 4 | memaiflow: 5 | restart: always 6 | image: yipengfei/memai-flow:latest 7 | container_name: memaiflow 8 | ports: 9 | - 8000:8000 10 | environment: 11 | MEM_API_KEY: '' 12 | CUBOX_AUTH_CODE: '' 13 | CUBOX_SYNC_INTERVAL: 300 -------------------------------------------------------------------------------- /app/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | user=memflow 3 | PUID=${PUID:-0} 4 | PGID=${PGID:-0} 5 | #create user if not exists 6 | if id -u $user >/dev/null 2>&1 ;then 7 | echo "$user exists" 8 | else 9 | echo "create $user(${PUID}): $user(${PGID})" 10 | useradd -U -d /data -s /bin/false $user 11 | usermod -G users $user 12 | groupmod -o -g "$PGID" $user 13 | usermod -o -u "$PUID" $user 14 | fi 15 | 16 | base_path='/app' 17 | 18 | chown -R $user:$user /app 19 | chown -R $user:$user /data 20 | 21 | supervisord -c /app/conf/supervisord.conf 22 | -------------------------------------------------------------------------------- /memflow/memapi.py: -------------------------------------------------------------------------------- 1 | import httpx 2 | from tenacity import retry, wait_random_exponential, stop_after_attempt 3 | 4 | CREATE_MEM_API = "https://api.mem.ai/v0/mems" 5 | 6 | 7 | class MemApi: 8 | def __init__(self, api_key: str): 9 | self.api_key = api_key 10 | self.headers = { 11 | "Authorization": "ApiAccessToken " + self.api_key, 12 | } 13 | 14 | @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) 15 | def create_mem(self, content: str): 16 | params = { 17 | "content": content 18 | } 19 | r = httpx.post(CREATE_MEM_API, json=params, headers=self.headers) 20 | r.raise_for_status() 21 | return r.json() 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9.1-slim-buster 2 | VOLUME /data 3 | EXPOSE 8000 4 | ENV TZ=Asia/Shanghai \ 5 | PYTHONPATH="${PYTHONPATH}:/dependencies:/app" \ 6 | WORKDIR='/data' \ 7 | MEM_API_KEY='' \ 8 | CUBOX_AUTH_CODE='' \ 9 | CUBOX_SYNC_INTERVAL=300 10 | 11 | COPY app /app 12 | COPY memflow /app/memflow 13 | COPY requirements.txt / 14 | 15 | RUN apt-get -y update \ 16 | && apt-get install -y --no-install-recommends tzdata \ 17 | && python -m pip install --upgrade pip \ 18 | && pip install supervisor \ 19 | && pip install --target=/dependencies -r requirements.txt \ 20 | && ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime \ 21 | && echo ${TZ} > /etc/timezone \ 22 | && rm -rf /var/lib/apt/lists/* 23 | 24 | WORKDIR /app 25 | CMD sh /app/start.sh 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 yipengfei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /memflow/models/syncrecord.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | 3 | from memflow.databases import * 4 | 5 | 6 | class SyncRecord(BaseDBModel): 7 | """ 8 | 存储站点用的一些Cookie 9 | """ 10 | __tablename__ = 'cookie_store' 11 | 12 | id = Column(Integer, primary_key=True, autoincrement=True, comment='id') 13 | channel = Column(String, comment='内容来源通道', nullable=False) 14 | content_id = Column(String, comment='内容的唯一编号,用于查询重复', nullable=False) 15 | mem_id = Column(String, comment='写入mem后的唯一编号', nullable=False) 16 | mem_url = Column(String, comment='写入mem后获得的访问链接', nullable=False) 17 | 18 | @staticmethod 19 | def exists(channel: str, content_id: str) -> bool: 20 | return SyncRecord.query().filter( 21 | (SyncRecord.channel == channel) & (SyncRecord.content_id == content_id)).first() is not None 22 | 23 | @staticmethod 24 | def insert(channel: str, content_id: str, mem_id: str, mem_url: str): 25 | record = SyncRecord() 26 | record.channel = channel 27 | record.content_id = content_id 28 | record.mem_id = mem_id 29 | record.mem_url = mem_url 30 | record.save() 31 | -------------------------------------------------------------------------------- /memflow/common/logging.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | LOGGING_CONFIG = { 4 | 'version': 1, 5 | 'disable_existing_loggers': False, 6 | 'formatters': { 7 | 'default': { 8 | 'format': '%(asctime)s - %(name)s - %(levelname)s - [%(threadName)s] - %(message)s', 9 | }, 10 | }, 11 | 'handlers': { 12 | 'console': { 13 | 'class': 'logging.StreamHandler', 14 | 'level': 'INFO', 15 | 'formatter': 'default', 16 | }, 17 | 'file': { 18 | 'class': 'logging.handlers.TimedRotatingFileHandler', 19 | 'level': 'INFO', 20 | 'formatter': 'default', 21 | 'filename': f"{os.environ.get('WORKDIR')}/logs/app.log", 22 | 'when': 'D', 23 | 'interval': 1, 24 | 'backupCount': 7, 25 | }, 26 | }, 27 | 'loggers': { 28 | '': { # root logger 29 | 'handlers': ['console', 'file'], 30 | 'level': 'INFO', 31 | 'propagate': True, 32 | }, 33 | 'apscheduler': { # Specific logger for apscheduler 34 | 'handlers': ['console', 'file'], 35 | 'level': 'ERROR', # Set to WARNING to suppress INFO and DEBUG messages 36 | 'propagate': False, # Do not propagate to root logger 37 | }, 38 | 'httpx': { # Specific logger for httpx 39 | 'handlers': ['console', 'file'], 40 | 'level': 'ERROR', # Set to WARNING to suppress INFO and DEBUG messages 41 | 'propagate': False, # Do not propagate to root logger 42 | }, 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MemAI-Flow 2 | 3 | 拓展mem.ai的内容,使它在中文生态更好用。 4 | 5 | # 介绍 6 | 7 | ## 什么是Mem.ai 8 | 9 | https://mem.ai 10 | 是OpenAI旗下基金投资的一个项目。使用Mem,你曾经拥有的每一个想法、创意和信息都可以轻松获取,有序且易于访问。使你拥有无限记忆!它使用OpenAI的模型来智能搜索或者提问,你在和模型对话时,除了模型知识,它还会优先从你记录的笔记中来回答问题。 11 | 12 | 它还有非常强大的创作能力,AI内容编辑等,建议去官网了解。 13 | 14 | ## 为什么需要MemAI-Flow 15 | 16 | Mem在海外的生态非常完善,可以轻松保存笔记、链接、Twitter(X)上的内容,还提供了Zapier连接方式,同时也有API。 17 | 18 | 但是在国内,它不太容易轻松的收藏内容,尤其是我们习惯的微信、微博、知乎等。MemAI-Flow的目的就是让MemAI在国内生态更好用,让我们可以轻松的把国内信息源看到的内容,轻松同步到Mem,让它成为我们的超级记忆!我们不会重复造轮子,只做连接器。 19 | 20 | # 功能 21 | 22 | ## 利用Cubox打通国内生态 23 | 24 | https://cubox.pro 是国内公司开发的一款很好用的一站式信息收集、阅读、管理工具,网页, iOS, iPadOS, macOS, Android, Windows, 25 | 微信全端支持。利用它可以非常容易的把微信读到的文章,看到的微博等内容,轻松剪藏。 26 | 27 | 可它也有不足,AI能力一定是没有OpenAI投资的Mem强,所以我们只需要利用它的全端剪藏能力,然后把内容同步送进Mem。这既是MemAI-Flow当前的主要功能。 28 | 29 | # 如何使用 30 | 31 | 你需要持续的运行MemAI-Flow,它才会及时自动把你剪藏的内容同步到Mem。建议部署在云服务器、NAS、树莓派等设备上,这样你可以随时随地剪藏,不用担心MemAI-Flow没有运行。 32 | ## 环境变量解释 33 | | 环境变量 | 说明 | 34 | | --- |---------------------------------------------------------------------| 35 | | MEM_API_KEY | Mem的API Key,可以在[Manage my API Keys](https://mem.ai/sources/api) 中设置 | 36 | | CUBOX_AUTH_CODE | Cubox的接口授权码,需要登录Web页面,抓包任何接口请求,从Headers中查看Authorization的值 | 37 | | CUBOX_SYNC_INTERVAL | Cubox同步间隔,单位秒,默认300秒,即5分钟,建议不要太快爬取,避免给Cubox制造太多访问压力 | 38 | 39 | ## 通过命令行运行 40 | ```bash 41 | docker run -d --restart always --name memaiflow -p 8000:8000 -e MEM_API_KEY='' -e CUBOX_AUTH_CODE='' -e CUBOX_SYNC_INTERVAL=300 yipengfei/memai-flow:latest 42 | ``` 43 | ## 通过docker-compose运行 44 | ```yaml 45 | version: "3" 46 | 47 | services: 48 | memaiflow: 49 | restart: always 50 | image: yipengfei/memai-flow:latest 51 | container_name: memaiflow 52 | ports: 53 | - 8000:8000 54 | environment: 55 | MEM_API_KEY: '' 56 | CUBOX_AUTH_CODE: '' 57 | CUBOX_SYNC_INTERVAL: 300 58 | ``` 59 | -------------------------------------------------------------------------------- /memflow/common/response.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from fastapi import status 4 | from fastapi.encoders import jsonable_encoder 5 | from fastapi.responses import JSONResponse, Response 6 | from typing import Union 7 | 8 | from starlette.responses import PlainTextResponse 9 | 10 | from memflow.common.customjsonencoder import CustomJSONEncoder 11 | 12 | 13 | def json_200(data: Union[bool, list, dict, str, None] = None, message: Union[str, None] = None) -> Response: 14 | """ 15 | 返回http_status=200的结果 16 | :param data: 返回结果 17 | :param message: 消息 18 | :return: 19 | """ 20 | if not message: 21 | message = "success" 22 | if data: 23 | if isinstance(data, list): 24 | if len(data) > 0 and 'to_dict' in dir(data[0]): 25 | data = [i.to_dict() for i in data] 26 | elif 'to_dict' in dir(data): 27 | data = data.to_dict() 28 | return PlainTextResponse( 29 | media_type="application/json", 30 | status_code=status.HTTP_200_OK, 31 | content=json.dumps({ 32 | 'success': True, 33 | 'errorCode': 0, 34 | 'message': message, 35 | 'data': data, 36 | }, cls=CustomJSONEncoder), 37 | ) 38 | 39 | 40 | def json_500(data: Union[bool, list, dict, str, None] = None, message: Union[str, None] = None) -> Response: 41 | """ 42 | 返回http_status=500的结果 43 | :param data: 返回结果 44 | :param message: 消息 45 | :return: 46 | """ 47 | if not message: 48 | message = "success" 49 | return JSONResponse( 50 | status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, 51 | content={ 52 | 'success': False, 53 | 'errorCode': 1, 54 | 'message': message, 55 | 'data': data, 56 | } 57 | ) 58 | 59 | 60 | def json_with_status(status_code: int, data: Union[bool, list, dict, str, None] = None, 61 | message: Union[str, None] = None) -> Response: 62 | """ 63 | 返回自定义statuscode的结果 64 | :param data: 返回结果 65 | :param message: 消息 66 | :return: 67 | """ 68 | if not message: 69 | message = "success" 70 | return JSONResponse( 71 | status_code=status_code, 72 | content={ 73 | 'success': False, 74 | 'errorCode': 1, 75 | 'message': message, 76 | 'data': data, 77 | } 78 | ) 79 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vs/ 2 | .vscode/ 3 | .idea/ 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | notebooks/ 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .envrc 111 | .venv 112 | .venvs 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | 137 | # macOS display setting files 138 | .DS_Store 139 | 140 | # Wandb directory 141 | wandb/ 142 | 143 | # asdf tool versions 144 | .tool-versions 145 | /.ruff_cache/ 146 | 147 | *.pkl 148 | *.bin 149 | 150 | # integration test artifacts 151 | data_map* 152 | \[('_type', 'fake'), ('stop', None)] 153 | 154 | data/ -------------------------------------------------------------------------------- /memflow/utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import decimal 3 | import json 4 | from enum import Enum 5 | from typing import Dict, List, _GenericAlias, Union 6 | 7 | 8 | def _list_value(value): 9 | if isinstance(value, str): 10 | if value[0] in ['{', '[']: 11 | return json.loads(value) 12 | else: 13 | return value.split(',') 14 | else: 15 | return list(value) 16 | 17 | 18 | def _dict_value(value): 19 | if isinstance(value, str): 20 | return json.loads(value) 21 | else: 22 | return value 23 | 24 | 25 | def parse_field_value(field_value): 26 | if isinstance(field_value, decimal.Decimal): # Decimal -> float 27 | field_value = round(float(field_value), 2) 28 | elif isinstance(field_value, datetime.datetime): # datetime -> str 29 | field_value = str(field_value) 30 | elif isinstance(field_value, list): 31 | field_value = [parse_field_value(i) for i in field_value] 32 | if hasattr(field_value, 'to_json'): 33 | field_value = field_value.to_json() 34 | elif isinstance(field_value, Enum): 35 | field_value = field_value.name 36 | elif isinstance(field_value, Dict): 37 | val = {} 38 | for key_ in field_value: 39 | val[key_] = parse_field_value(field_value[key_]) 40 | field_value = val 41 | return field_value 42 | 43 | 44 | def parse_value(func, value, default_value=None): 45 | if value is not None: 46 | if func == bool: 47 | if value in (1, True, "1", "true"): 48 | return True 49 | elif value in (0, False, "0", "false"): 50 | return False 51 | else: 52 | raise ValueError(value) 53 | 54 | elif func in (int, float): 55 | try: 56 | if isinstance(value, str): 57 | value = value.replace(',', '') 58 | return func(value) 59 | except ValueError: 60 | return float('nan') 61 | elif func == datetime.datetime: 62 | if isinstance(value, datetime.datetime): 63 | return value 64 | elif isinstance(value, str): 65 | if value: 66 | return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') 67 | else: 68 | return None 69 | else: 70 | return None 71 | elif func in [Dict, dict]: 72 | return _dict_value(value) 73 | elif func in [List, list]: 74 | return _list_value(value) 75 | elif isinstance(func, _GenericAlias): 76 | if func.__origin__ in [List, list]: 77 | list_ = _list_value(value) 78 | res = [] 79 | for x in list_: 80 | res.append(parse_value(func.__args__[0], x)) 81 | return res 82 | elif func.__origin__ == Union: 83 | return parse_value(func.__args__[0], value) 84 | return func(value) 85 | else: 86 | return default_value -------------------------------------------------------------------------------- /memflow/tasks/cuboxsynctask.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import httpx 5 | import inject 6 | from tenacity import wait_random_exponential, retry, stop_after_attempt 7 | 8 | from memflow.exceptions import CuboxErrorException 9 | from memflow.memapi import MemApi 10 | from memflow.models import SyncRecord 11 | from trafilatura import extract 12 | from markdownify import markdownify as md 13 | 14 | CHANNEL_NAME = "cubox" 15 | INBOX_URL = "https://cubox.pro/c/api/v2/search_engine/inbox" 16 | DETAIL_URL = "https://cubox.pro/c/api/v2/bookmark/detail" 17 | _LOGGER = logging.getLogger(__name__) 18 | 19 | 20 | def extract_data_from_response(response): 21 | if response.get("code") != 200: 22 | raise CuboxErrorException( 23 | "Response error,code: %s message: %s" % (response.get("code"), response.get("message"))) 24 | return response.get("data") 25 | 26 | 27 | class CuboxSyncTask: 28 | def __init__(self, authorization: str): 29 | self.authorization = authorization 30 | 31 | @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3)) 32 | def list_inbox(self, page: int = 1, asc: bool = False, archiving: bool = False): 33 | params = { 34 | "page": page, 35 | "asc": asc, 36 | "archiving": archiving, 37 | } 38 | headers = { 39 | "authorization": self.authorization, 40 | "referer": "https://cubox.pro/my/inbox" 41 | } 42 | r = httpx.get(INBOX_URL, params=params, headers=headers) 43 | r.raise_for_status() 44 | return r.json() 45 | 46 | def get_detail(self, bookmark_id: int): 47 | params = { 48 | "bookmarkId": bookmark_id 49 | } 50 | headers = { 51 | "authorization": self.authorization, 52 | "referer": "https://cubox.pro/my/card" 53 | } 54 | r = httpx.get(DETAIL_URL, params=params, headers=headers) 55 | r.raise_for_status() 56 | return r.json() 57 | 58 | def run(self): 59 | _LOGGER.info("start sync cubox content") 60 | data = extract_data_from_response(self.list_inbox()) 61 | mem_api: MemApi = inject.instance(MemApi) 62 | for item in data: 63 | bookmark_id = item.get('userSearchEngineID') 64 | if SyncRecord.exists(CHANNEL_NAME, bookmark_id): 65 | continue 66 | time.sleep(1) 67 | _LOGGER.info(f"start sync cubox bookmark id: {bookmark_id}") 68 | detail = extract_data_from_response(self.get_detail(bookmark_id)) 69 | 70 | # 用trafilatura先提取网页中的核心内容 71 | core_html = extract(f"{detail.get('content')}", include_links=True, 72 | include_formatting=True, 73 | include_images=True, output_format='xml') 74 | # 用markdownify将html转换为带格式的markdown 75 | page_content = md(core_html) 76 | 77 | url = detail.get('targetURL') 78 | title = detail.get('title') 79 | markdown_content = f'## {title}\n\n[🔗原文链接]({url})\n\n{page_content}' 80 | r = mem_api.create_mem(markdown_content) 81 | mem_url = r.get('url') 82 | SyncRecord.insert(CHANNEL_NAME, bookmark_id, r.get('id'), mem_url) 83 | _LOGGER.info(f"create mem success, title: {title} mem_url: {mem_url}") 84 | _LOGGER.info("sync cubox content success") 85 | -------------------------------------------------------------------------------- /memflow/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | 程序启动入口类 3 | """ 4 | import os 5 | 6 | from memflow.exceptions import CuboxErrorException 7 | 8 | if not os.environ.get("WORKDIR"): 9 | workdir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data') 10 | else: 11 | workdir = os.environ.get("WORKDIR") 12 | if not os.path.exists(workdir): 13 | os.makedirs(workdir) 14 | log_dir = os.path.join(workdir, 'logs') 15 | if not os.path.exists(log_dir): 16 | os.makedirs(log_dir) 17 | os.environ["WORKDIR"] = workdir 18 | import logging.config 19 | import inject 20 | 21 | from apscheduler.schedulers.background import BackgroundScheduler 22 | from fastapi.exceptions import RequestValidationError 23 | 24 | from memflow.common.logging import LOGGING_CONFIG 25 | from memflow.memapi import MemApi 26 | 27 | logging.config.dictConfig(LOGGING_CONFIG) 28 | 29 | import httpx 30 | import uvicorn 31 | from starlette.exceptions import HTTPException 32 | from fastapi import FastAPI 33 | from memflow.databases import create_all 34 | 35 | from memflow.common.response import json_200, json_500, json_with_status 36 | from memflow.models import * 37 | 38 | scheduler = BackgroundScheduler(daemon=True) 39 | 40 | log = logging.getLogger(__name__) 41 | 42 | # 初始化ORM框架 43 | create_all() 44 | 45 | app = FastAPI() 46 | 47 | 48 | # 加载所有fastapi的接口路由 49 | 50 | @app.get("/") 51 | async def root(): 52 | """ 53 | 默认首页 54 | :return: 55 | """ 56 | return json_200(message='memflow server') 57 | 58 | 59 | @app.exception_handler(RequestValidationError) 60 | async def unprocessable_entity_handler(request, exc: RequestValidationError): 61 | return json_with_status( 62 | status_code=422, 63 | message='Parameter error', 64 | data=dict(exc.errors()) 65 | ) 66 | 67 | 68 | @app.exception_handler(HTTPException) 69 | async def http_exception_handler(request, exc): 70 | return json_with_status(status_code=exc.status_code, message=exc.detail) 71 | 72 | 73 | @app.exception_handler(httpx.HTTPStatusError) 74 | async def http_status_exception_handler(request, e: httpx.HTTPStatusError): 75 | msg = e.response.json().get('error', {}).get('message') 76 | log.error('http status exception: ' + msg, exc_info=True) 77 | return json_500(message=msg) 78 | 79 | 80 | @app.exception_handler(Exception) 81 | async def universal_exception_handler(request, exc): 82 | log.error('universal_exception_handler', exc_info=True) 83 | return json_500(message=str(exc)) 84 | 85 | 86 | def config(binder): 87 | api_key = os.environ.get("MEM_API_KEY") 88 | if not api_key: 89 | raise CuboxErrorException("MEM_API_KEY not found, please set it in env") 90 | mem = MemApi(api_key) 91 | binder.bind(MemApi, mem) 92 | 93 | 94 | def startup(): 95 | inject.configure(config) 96 | from memflow.tasks.cuboxsynctask import CuboxSyncTask 97 | auth_code = os.environ.get("CUBOX_AUTH_CODE") 98 | if not auth_code: 99 | raise CuboxErrorException("CUBOX_AUTH_CODE not found, please set it in env") 100 | interval_secs = int(os.environ.get('CUBOX_SYNC_INTERVAL', 300)) 101 | scheduler.add_job(CuboxSyncTask(auth_code).run, 'interval', 102 | seconds=interval_secs) 103 | log.info("add job cubox sync task, interval: %s seconds" % interval_secs) 104 | scheduler.start() 105 | 106 | 107 | if __name__ == "__main__": 108 | startup() 109 | uvicorn.run(app, host="0.0.0.0", port=os.environ.get("WEB_PORT", 8000)) 110 | -------------------------------------------------------------------------------- /memflow/databases.py: -------------------------------------------------------------------------------- 1 | """ 2 | 与数据库有关的操作类 3 | """ 4 | import datetime 5 | import os 6 | 7 | from dataclasses_json import dataclass_json 8 | from sqlalchemy import create_engine, Column, DateTime, String, Integer, Text, select 9 | from sqlalchemy.ext.declarative import declarative_base 10 | from sqlalchemy.orm import Session 11 | 12 | from memflow import utils 13 | 14 | # WORKDIR环境变量文件夹内的db目录,为数据库文件存放目录 15 | db_path = os.path.join(os.environ.get('WORKDIR', os.path.dirname(os.path.abspath(__file__))), 'db') 16 | if not os.path.exists(db_path): 17 | os.makedirs(db_path) 18 | engine = create_engine( 19 | f'sqlite:////{db_path}/main.db?check_same_thread=False&timeout=60' 20 | ) 21 | Base = declarative_base() 22 | 23 | 24 | def create_all(): 25 | """ 26 | 自动初始化数据库引擎和ORM框架 27 | 会自动生成模型定义的结构为数据表 28 | :return: 29 | """ 30 | Base.metadata.create_all(engine) 31 | 32 | 33 | class BaseDBModel(Base): 34 | """ 35 | 数据表基类,每张表的模型类继承此类 36 | """ 37 | __abstract__ = True 38 | __table_args__ = {'extend_existing': True} 39 | created_at = Column(DateTime, nullable=False, default=datetime.datetime.now) 40 | updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now, onupdate=datetime.datetime.now) 41 | 42 | def get_columns(self): 43 | """ 44 | 返回所有字段对象 45 | :return: 46 | """ 47 | return self.__table__.columns 48 | 49 | @classmethod 50 | def query(cls): 51 | session = Session(bind=engine) 52 | return session.query(cls) 53 | 54 | def get_fields(self): 55 | """ 56 | 返回所有字段 57 | :return: 58 | """ 59 | return self.__dict__ 60 | 61 | def save(self): 62 | """ 63 | 新增 64 | :return: 65 | """ 66 | session = Session(bind=engine) 67 | try: 68 | session.add(self) 69 | session.commit() 70 | except BaseException as e: 71 | session.rollback() 72 | raise 73 | 74 | def update(self): 75 | """ 76 | 新增 77 | :return: 78 | """ 79 | session = Session(bind=engine) 80 | try: 81 | self.updated_at = datetime.datetime.now() 82 | session.merge(self) 83 | session.commit() 84 | except: 85 | session.rollback() 86 | raise 87 | 88 | @staticmethod 89 | def save_all(model_list): 90 | """ 91 | 批量新增 92 | :param model_list: 93 | :return: 94 | """ 95 | session = Session(bind=engine) 96 | try: 97 | session.add_all(model_list) 98 | session.commit() 99 | except: 100 | session.rollback() 101 | raise 102 | 103 | def delete(self): 104 | session = Session(bind=engine) 105 | try: 106 | session.commit() 107 | except: 108 | session.rollback() 109 | raise 110 | 111 | def to_dict(self, hidden_fields=None): 112 | """ 113 | Json序列化 114 | :param hidden_fields: 覆盖类属性 hidden_fields 115 | :return: 116 | """ 117 | model_json = {} 118 | if not hidden_fields: 119 | hidden_fields = self.__hidden_fields__ 120 | if not hidden_fields: 121 | hidden_fields = [] 122 | for column in self.__dict__: 123 | if column in hidden_fields: 124 | continue 125 | if hasattr(self, column): 126 | model_json[column] = utils.parse_field_value(getattr(self, column)) 127 | if '_sa_instance_state' in model_json: 128 | del model_json['_sa_instance_state'] 129 | return model_json 130 | --------------------------------------------------------------------------------