├── memflow
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── customjsonencoder.py
    │   ├── logging.py
    │   └── response.py
    ├── tasks
    │   ├── __init__.py
    │   └── cuboxsynctask.py
    ├── models
    │   ├── __init__.py
    │   └── syncrecord.py
    ├── exceptions.py
    ├── memapi.py
    ├── utils.py
    ├── main.py
    └── databases.py
├── requirements.txt
├── app
    ├── conf
    │   └── supervisord.conf
    └── start.sh
├── docker-compose.yml
├── Dockerfile
├── LICENSE
├── README.md
└── .gitignore


/memflow/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/memflow/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/memflow/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/memflow/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .syncrecord import SyncRecord
2 | 


--------------------------------------------------------------------------------
/memflow/exceptions.py:
--------------------------------------------------------------------------------
1 | class CuboxErrorException(RuntimeError):
2 |     def __init__(self, message):
3 |         self.message = message
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | uvicorn[standard]
 2 | fastapi==0.104.1
 3 | tenacity==8.2.3
 4 | httpx==0.25.1
 5 | dataclasses-json==0.6.1
 6 | SQLAlchemy==2.0.23
 7 | trafilatura==1.6.2
 8 | inject==5.1.0
 9 | APScheduler==3.10.4
10 | trafilatura==1.6.2
11 | markdownify==0.11.6


--------------------------------------------------------------------------------
/app/conf/supervisord.conf:
--------------------------------------------------------------------------------
 1 | [supervisord]
 2 | nodaemon=true
 3 | 
 4 | [program:memflow]
 5 | user = memflow
 6 | directory = /app
 7 | command = python memflow/main.py
 8 | startsecs=3
 9 | autostart = true
10 | autorestart = true
11 | redirect_stderr=true
12 | stdout_logfile=/dev/fd/1
13 | stdout_logfile_maxbytes=0


--------------------------------------------------------------------------------
/memflow/common/customjsonencoder.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import datetime
 3 | 
 4 | 
 5 | class CustomJSONEncoder(json.JSONEncoder):
 6 |     def default(self, obj):
 7 |         if isinstance(obj, datetime.datetime):
 8 |             return obj.strftime("%Y-%m-%d %H:%M:%S")
 9 |         return super().default(obj)
10 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | 
 3 | services:
 4 |   memaiflow:
 5 |     restart: always
 6 |     image: yipengfei/memai-flow:latest
 7 |     container_name: memaiflow
 8 |     ports:
 9 |       - 8000:8000
10 |     environment:
11 |       MEM_API_KEY: ''
12 |       CUBOX_AUTH_CODE: ''
13 |       CUBOX_SYNC_INTERVAL: 300


--------------------------------------------------------------------------------
/app/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | user=memflow
 3 | PUID=${PUID:-0}
 4 | PGID=${PGID:-0}
 5 | #create user if not exists
 6 | if id -u $user >/dev/null 2>&1 ;then
 7 |   echo "$user exists"
 8 | else
 9 |   echo "create $user(${PUID}): $user(${PGID})"
10 |   useradd -U -d /data -s /bin/false $user
11 |   usermod -G users $user
12 |   groupmod -o -g "$PGID" $user
13 |   usermod -o -u "$PUID" $user
14 | fi
15 | 
16 | base_path='/app'
17 | 
18 | chown -R $user:$user /app
19 | chown -R $user:$user /data
20 | 
21 | supervisord -c /app/conf/supervisord.conf
22 | 


--------------------------------------------------------------------------------
/memflow/memapi.py:
--------------------------------------------------------------------------------
 1 | import httpx
 2 | from tenacity import retry, wait_random_exponential, stop_after_attempt
 3 | 
 4 | CREATE_MEM_API = "https://api.mem.ai/v0/mems"
 5 | 
 6 | 
 7 | class MemApi:
 8 |     def __init__(self, api_key: str):
 9 |         self.api_key = api_key
10 |         self.headers = {
11 |             "Authorization": "ApiAccessToken " + self.api_key,
12 |         }
13 | 
14 |     @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3))
15 |     def create_mem(self, content: str):
16 |         params = {
17 |             "content": content
18 |         }
19 |         r = httpx.post(CREATE_MEM_API, json=params, headers=self.headers)
20 |         r.raise_for_status()
21 |         return r.json()
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9.1-slim-buster
 2 | VOLUME /data
 3 | EXPOSE 8000
 4 | ENV TZ=Asia/Shanghai \
 5 |     PYTHONPATH="${PYTHONPATH}:/dependencies:/app" \
 6 |     WORKDIR='/data' \
 7 |     MEM_API_KEY='' \
 8 |     CUBOX_AUTH_CODE='' \
 9 |     CUBOX_SYNC_INTERVAL=300
10 | 
11 | COPY app /app
12 | COPY memflow /app/memflow
13 | COPY requirements.txt /
14 | 
15 | RUN apt-get -y update \
16 | 	&& apt-get install -y --no-install-recommends tzdata \
17 |     && python -m pip install --upgrade pip \
18 |     && pip install supervisor \
19 |     && pip install --target=/dependencies -r requirements.txt \
20 | 	&& ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime  \
21 | 	&& echo ${TZ} > /etc/timezone \
22 |     && rm -rf /var/lib/apt/lists/*
23 | 
24 | WORKDIR /app
25 | CMD sh /app/start.sh
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 yipengfei
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/memflow/models/syncrecord.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | 
 3 | from memflow.databases import *
 4 | 
 5 | 
 6 | class SyncRecord(BaseDBModel):
 7 |     """
 8 |     存储站点用的一些Cookie
 9 |     """
10 |     __tablename__ = 'cookie_store'
11 | 
12 |     id = Column(Integer, primary_key=True, autoincrement=True, comment='id')
13 |     channel = Column(String, comment='内容来源通道', nullable=False)
14 |     content_id = Column(String, comment='内容的唯一编号，用于查询重复', nullable=False)
15 |     mem_id = Column(String, comment='写入mem后的唯一编号', nullable=False)
16 |     mem_url = Column(String, comment='写入mem后获得的访问链接', nullable=False)
17 | 
18 |     @staticmethod
19 |     def exists(channel: str, content_id: str) -> bool:
20 |         return SyncRecord.query().filter(
21 |             (SyncRecord.channel == channel) & (SyncRecord.content_id == content_id)).first() is not None
22 | 
23 |     @staticmethod
24 |     def insert(channel: str, content_id: str, mem_id: str, mem_url: str):
25 |         record = SyncRecord()
26 |         record.channel = channel
27 |         record.content_id = content_id
28 |         record.mem_id = mem_id
29 |         record.mem_url = mem_url
30 |         record.save()
31 | 


--------------------------------------------------------------------------------
/memflow/common/logging.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | LOGGING_CONFIG = {
 4 |     'version': 1,
 5 |     'disable_existing_loggers': False,
 6 |     'formatters': {
 7 |         'default': {
 8 |             'format': '%(asctime)s - %(name)s - %(levelname)s - [%(threadName)s] - %(message)s',
 9 |         },
10 |     },
11 |     'handlers': {
12 |         'console': {
13 |             'class': 'logging.StreamHandler',
14 |             'level': 'INFO',
15 |             'formatter': 'default',
16 |         },
17 |         'file': {
18 |             'class': 'logging.handlers.TimedRotatingFileHandler',
19 |             'level': 'INFO',
20 |             'formatter': 'default',
21 |             'filename': f"{os.environ.get('WORKDIR')}/logs/app.log",
22 |             'when': 'D',
23 |             'interval': 1,
24 |             'backupCount': 7,
25 |         },
26 |     },
27 |     'loggers': {
28 |         '': {  # root logger
29 |             'handlers': ['console', 'file'],
30 |             'level': 'INFO',
31 |             'propagate': True,
32 |         },
33 |         'apscheduler': {  # Specific logger for apscheduler
34 |             'handlers': ['console', 'file'],
35 |             'level': 'ERROR',  # Set to WARNING to suppress INFO and DEBUG messages
36 |             'propagate': False,  # Do not propagate to root logger
37 |         },
38 |         'httpx': {  # Specific logger for httpx
39 |             'handlers': ['console', 'file'],
40 |             'level': 'ERROR',  # Set to WARNING to suppress INFO and DEBUG messages
41 |             'propagate': False,  # Do not propagate to root logger
42 |         },
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MemAI-Flow
 2 | 
 3 | 拓展mem.ai的内容，使它在中文生态更好用。
 4 | 
 5 | # 介绍
 6 | 
 7 | ## 什么是Mem.ai
 8 | 
 9 | https://mem.ai
10 | 是OpenAI旗下基金投资的一个项目。使用Mem，你曾经拥有的每一个想法、创意和信息都可以轻松获取，有序且易于访问。使你拥有无限记忆！它使用OpenAI的模型来智能搜索或者提问，你在和模型对话时，除了模型知识，它还会优先从你记录的笔记中来回答问题。
11 | 
12 | 它还有非常强大的创作能力，AI内容编辑等，建议去官网了解。
13 | 
14 | ## 为什么需要MemAI-Flow
15 | 
16 | Mem在海外的生态非常完善，可以轻松保存笔记、链接、Twitter(X)上的内容，还提供了Zapier连接方式，同时也有API。
17 | 
18 | 但是在国内，它不太容易轻松的收藏内容，尤其是我们习惯的微信、微博、知乎等。MemAI-Flow的目的就是让MemAI在国内生态更好用，让我们可以轻松的把国内信息源看到的内容，轻松同步到Mem，让它成为我们的超级记忆！我们不会重复造轮子，只做连接器。
19 | 
20 | # 功能
21 | 
22 | ## 利用Cubox打通国内生态
23 | 
24 | https://cubox.pro 是国内公司开发的一款很好用的一站式信息收集、阅读、管理工具，网页, iOS, iPadOS, macOS, Android, Windows,
25 | 微信全端支持。利用它可以非常容易的把微信读到的文章，看到的微博等内容，轻松剪藏。
26 | 
27 | 可它也有不足，AI能力一定是没有OpenAI投资的Mem强，所以我们只需要利用它的全端剪藏能力，然后把内容同步送进Mem。这既是MemAI-Flow当前的主要功能。
28 | 
29 | # 如何使用
30 | 
31 | 你需要持续的运行MemAI-Flow，它才会及时自动把你剪藏的内容同步到Mem。建议部署在云服务器、NAS、树莓派等设备上，这样你可以随时随地剪藏，不用担心MemAI-Flow没有运行。
32 | ## 环境变量解释
33 | | 环境变量 | 说明                                                                  |
34 | | --- |---------------------------------------------------------------------|
35 | | MEM_API_KEY | Mem的API Key，可以在[Manage my API Keys](https://mem.ai/sources/api) 中设置 |
36 | | CUBOX_AUTH_CODE | Cubox的接口授权码，需要登录Web页面，抓包任何接口请求，从Headers中查看Authorization的值           |
37 | | CUBOX_SYNC_INTERVAL | Cubox同步间隔，单位秒，默认300秒，即5分钟，建议不要太快爬取，避免给Cubox制造太多访问压力                 |
38 | 
39 | ## 通过命令行运行
40 | ```bash
41 | docker run -d --restart always --name memaiflow -p 8000:8000 -e MEM_API_KEY='' -e CUBOX_AUTH_CODE='' -e CUBOX_SYNC_INTERVAL=300 yipengfei/memai-flow:latest
42 | ```
43 | ## 通过docker-compose运行
44 | ```yaml
45 | version: "3"
46 | 
47 | services:
48 |   memaiflow:
49 |     restart: always
50 |     image: yipengfei/memai-flow:latest
51 |     container_name: memaiflow
52 |     ports:
53 |       - 8000:8000
54 |     environment:
55 |       MEM_API_KEY: ''
56 |       CUBOX_AUTH_CODE: ''
57 |       CUBOX_SYNC_INTERVAL: 300
58 | ```
59 | 


--------------------------------------------------------------------------------
/memflow/common/response.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from fastapi import status
 4 | from fastapi.encoders import jsonable_encoder
 5 | from fastapi.responses import JSONResponse, Response
 6 | from typing import Union
 7 | 
 8 | from starlette.responses import PlainTextResponse
 9 | 
10 | from memflow.common.customjsonencoder import CustomJSONEncoder
11 | 
12 | 
13 | def json_200(data: Union[bool, list, dict, str, None] = None, message: Union[str, None] = None) -> Response:
14 |     """
15 |     返回http_status=200的结果
16 |     :param data: 返回结果
17 |     :param message: 消息
18 |     :return:
19 |     """
20 |     if not message:
21 |         message = "success"
22 |     if data:
23 |         if isinstance(data, list):
24 |             if len(data) > 0 and 'to_dict' in dir(data[0]):
25 |                 data = [i.to_dict() for i in data]
26 |         elif 'to_dict' in dir(data):
27 |             data = data.to_dict()
28 |     return PlainTextResponse(
29 |         media_type="application/json",
30 |         status_code=status.HTTP_200_OK,
31 |         content=json.dumps({
32 |             'success': True,
33 |             'errorCode': 0,
34 |             'message': message,
35 |             'data': data,
36 |         }, cls=CustomJSONEncoder),
37 |     )
38 | 
39 | 
40 | def json_500(data: Union[bool, list, dict, str, None] = None, message: Union[str, None] = None) -> Response:
41 |     """
42 |     返回http_status=500的结果
43 |     :param data: 返回结果
44 |     :param message: 消息
45 |     :return:
46 |     """
47 |     if not message:
48 |         message = "success"
49 |     return JSONResponse(
50 |         status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
51 |         content={
52 |             'success': False,
53 |             'errorCode': 1,
54 |             'message': message,
55 |             'data': data,
56 |         }
57 |     )
58 | 
59 | 
60 | def json_with_status(status_code: int, data: Union[bool, list, dict, str, None] = None,
61 |                      message: Union[str, None] = None) -> Response:
62 |     """
63 |     返回自定义statuscode的结果
64 |     :param data: 返回结果
65 |     :param message: 消息
66 |     :return:
67 |     """
68 |     if not message:
69 |         message = "success"
70 |     return JSONResponse(
71 |         status_code=status_code,
72 |         content={
73 |             'success': False,
74 |             'errorCode': 1,
75 |             'message': message,
76 |             'data': data,
77 |         }
78 |     )
79 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .vs/
  2 | .vscode/
  3 | .idea/
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | notebooks/
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .envrc
111 | .venv
112 | .venvs
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 | 
134 | # Pyre type checker
135 | .pyre/
136 | 
137 | # macOS display setting files
138 | .DS_Store
139 | 
140 | # Wandb directory
141 | wandb/
142 | 
143 | # asdf tool versions
144 | .tool-versions
145 | /.ruff_cache/
146 | 
147 | *.pkl
148 | *.bin
149 | 
150 | # integration test artifacts
151 | data_map*
152 | \[('_type', 'fake'), ('stop', None)]
153 | 
154 | data/


--------------------------------------------------------------------------------
/memflow/utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import decimal
 3 | import json
 4 | from enum import Enum
 5 | from typing import Dict, List, _GenericAlias, Union
 6 | 
 7 | 
 8 | def _list_value(value):
 9 |     if isinstance(value, str):
10 |         if value[0] in ['{', '[']:
11 |             return json.loads(value)
12 |         else:
13 |             return value.split(',')
14 |     else:
15 |         return list(value)
16 | 
17 | 
18 | def _dict_value(value):
19 |     if isinstance(value, str):
20 |         return json.loads(value)
21 |     else:
22 |         return value
23 | 
24 | 
25 | def parse_field_value(field_value):
26 |     if isinstance(field_value, decimal.Decimal):  # Decimal -> float
27 |         field_value = round(float(field_value), 2)
28 |     elif isinstance(field_value, datetime.datetime):  # datetime -> str
29 |         field_value = str(field_value)
30 |     elif isinstance(field_value, list):
31 |         field_value = [parse_field_value(i) for i in field_value]
32 |     if hasattr(field_value, 'to_json'):
33 |         field_value = field_value.to_json()
34 |     elif isinstance(field_value, Enum):
35 |         field_value = field_value.name
36 |     elif isinstance(field_value, Dict):
37 |         val = {}
38 |         for key_ in field_value:
39 |             val[key_] = parse_field_value(field_value[key_])
40 |         field_value = val
41 |     return field_value
42 | 
43 | 
44 | def parse_value(func, value, default_value=None):
45 |     if value is not None:
46 |         if func == bool:
47 |             if value in (1, True, "1", "true"):
48 |                 return True
49 |             elif value in (0, False, "0", "false"):
50 |                 return False
51 |             else:
52 |                 raise ValueError(value)
53 | 
54 |         elif func in (int, float):
55 |             try:
56 |                 if isinstance(value, str):
57 |                     value = value.replace(',', '')
58 |                 return func(value)
59 |             except ValueError:
60 |                 return float('nan')
61 |         elif func == datetime.datetime:
62 |             if isinstance(value, datetime.datetime):
63 |                 return value
64 |             elif isinstance(value, str):
65 |                 if value:
66 |                     return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
67 |                 else:
68 |                     return None
69 |             else:
70 |                 return None
71 |         elif func in [Dict, dict]:
72 |             return _dict_value(value)
73 |         elif func in [List, list]:
74 |             return _list_value(value)
75 |         elif isinstance(func, _GenericAlias):
76 |             if func.__origin__ in [List, list]:
77 |                 list_ = _list_value(value)
78 |                 res = []
79 |                 for x in list_:
80 |                     res.append(parse_value(func.__args__[0], x))
81 |                 return res
82 |             elif func.__origin__ == Union:
83 |                 return parse_value(func.__args__[0], value)
84 |         return func(value)
85 |     else:
86 |         return default_value


--------------------------------------------------------------------------------
/memflow/tasks/cuboxsynctask.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | 
 4 | import httpx
 5 | import inject
 6 | from tenacity import wait_random_exponential, retry, stop_after_attempt
 7 | 
 8 | from memflow.exceptions import CuboxErrorException
 9 | from memflow.memapi import MemApi
10 | from memflow.models import SyncRecord
11 | from trafilatura import extract
12 | from markdownify import markdownify as md
13 | 
14 | CHANNEL_NAME = "cubox"
15 | INBOX_URL = "https://cubox.pro/c/api/v2/search_engine/inbox"
16 | DETAIL_URL = "https://cubox.pro/c/api/v2/bookmark/detail"
17 | _LOGGER = logging.getLogger(__name__)
18 | 
19 | 
20 | def extract_data_from_response(response):
21 |     if response.get("code") != 200:
22 |         raise CuboxErrorException(
23 |             "Response error,code: %s message: %s" % (response.get("code"), response.get("message")))
24 |     return response.get("data")
25 | 
26 | 
27 | class CuboxSyncTask:
28 |     def __init__(self, authorization: str):
29 |         self.authorization = authorization
30 | 
31 |     @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(3))
32 |     def list_inbox(self, page: int = 1, asc: bool = False, archiving: bool = False):
33 |         params = {
34 |             "page": page,
35 |             "asc": asc,
36 |             "archiving": archiving,
37 |         }
38 |         headers = {
39 |             "authorization": self.authorization,
40 |             "referer": "https://cubox.pro/my/inbox"
41 |         }
42 |         r = httpx.get(INBOX_URL, params=params, headers=headers)
43 |         r.raise_for_status()
44 |         return r.json()
45 | 
46 |     def get_detail(self, bookmark_id: int):
47 |         params = {
48 |             "bookmarkId": bookmark_id
49 |         }
50 |         headers = {
51 |             "authorization": self.authorization,
52 |             "referer": "https://cubox.pro/my/card"
53 |         }
54 |         r = httpx.get(DETAIL_URL, params=params, headers=headers)
55 |         r.raise_for_status()
56 |         return r.json()
57 | 
58 |     def run(self):
59 |         _LOGGER.info("start sync cubox content")
60 |         data = extract_data_from_response(self.list_inbox())
61 |         mem_api: MemApi = inject.instance(MemApi)
62 |         for item in data:
63 |             bookmark_id = item.get('userSearchEngineID')
64 |             if SyncRecord.exists(CHANNEL_NAME, bookmark_id):
65 |                 continue
66 |             time.sleep(1)
67 |             _LOGGER.info(f"start sync cubox bookmark id: {bookmark_id}")
68 |             detail = extract_data_from_response(self.get_detail(bookmark_id))
69 | 
70 |             # 用trafilatura先提取网页中的核心内容
71 |             core_html = extract(f"<html>{detail.get('content')}</html>", include_links=True,
72 |                                 include_formatting=True,
73 |                                 include_images=True, output_format='xml')
74 |             # 用markdownify将html转换为带格式的markdown
75 |             page_content = md(core_html)
76 | 
77 |             url = detail.get('targetURL')
78 |             title = detail.get('title')
79 |             markdown_content = f'## {title}\n\n[🔗原文链接]({url})\n\n{page_content}'
80 |             r = mem_api.create_mem(markdown_content)
81 |             mem_url = r.get('url')
82 |             SyncRecord.insert(CHANNEL_NAME, bookmark_id, r.get('id'), mem_url)
83 |             _LOGGER.info(f"create mem success, title: {title} mem_url: {mem_url}")
84 |         _LOGGER.info("sync cubox content success")
85 | 


--------------------------------------------------------------------------------
/memflow/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 程序启动入口类
  3 | """
  4 | import os
  5 | 
  6 | from memflow.exceptions import CuboxErrorException
  7 | 
  8 | if not os.environ.get("WORKDIR"):
  9 |     workdir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
 10 | else:
 11 |     workdir = os.environ.get("WORKDIR")
 12 | if not os.path.exists(workdir):
 13 |     os.makedirs(workdir)
 14 | log_dir = os.path.join(workdir, 'logs')
 15 | if not os.path.exists(log_dir):
 16 |     os.makedirs(log_dir)
 17 | os.environ["WORKDIR"] = workdir
 18 | import logging.config
 19 | import inject
 20 | 
 21 | from apscheduler.schedulers.background import BackgroundScheduler
 22 | from fastapi.exceptions import RequestValidationError
 23 | 
 24 | from memflow.common.logging import LOGGING_CONFIG
 25 | from memflow.memapi import MemApi
 26 | 
 27 | logging.config.dictConfig(LOGGING_CONFIG)
 28 | 
 29 | import httpx
 30 | import uvicorn
 31 | from starlette.exceptions import HTTPException
 32 | from fastapi import FastAPI
 33 | from memflow.databases import create_all
 34 | 
 35 | from memflow.common.response import json_200, json_500, json_with_status
 36 | from memflow.models import *
 37 | 
 38 | scheduler = BackgroundScheduler(daemon=True)
 39 | 
 40 | log = logging.getLogger(__name__)
 41 | 
 42 | # 初始化ORM框架
 43 | create_all()
 44 | 
 45 | app = FastAPI()
 46 | 
 47 | 
 48 | # 加载所有fastapi的接口路由
 49 | 
 50 | @app.get("/")
 51 | async def root():
 52 |     """
 53 |     默认首页
 54 |     :return:
 55 |     """
 56 |     return json_200(message='memflow server')
 57 | 
 58 | 
 59 | @app.exception_handler(RequestValidationError)
 60 | async def unprocessable_entity_handler(request, exc: RequestValidationError):
 61 |     return json_with_status(
 62 |         status_code=422,
 63 |         message='Parameter error',
 64 |         data=dict(exc.errors())
 65 |     )
 66 | 
 67 | 
 68 | @app.exception_handler(HTTPException)
 69 | async def http_exception_handler(request, exc):
 70 |     return json_with_status(status_code=exc.status_code, message=exc.detail)
 71 | 
 72 | 
 73 | @app.exception_handler(httpx.HTTPStatusError)
 74 | async def http_status_exception_handler(request, e: httpx.HTTPStatusError):
 75 |     msg = e.response.json().get('error', {}).get('message')
 76 |     log.error('http status exception: ' + msg, exc_info=True)
 77 |     return json_500(message=msg)
 78 | 
 79 | 
 80 | @app.exception_handler(Exception)
 81 | async def universal_exception_handler(request, exc):
 82 |     log.error('universal_exception_handler', exc_info=True)
 83 |     return json_500(message=str(exc))
 84 | 
 85 | 
 86 | def config(binder):
 87 |     api_key = os.environ.get("MEM_API_KEY")
 88 |     if not api_key:
 89 |         raise CuboxErrorException("MEM_API_KEY not found, please set it in env")
 90 |     mem = MemApi(api_key)
 91 |     binder.bind(MemApi, mem)
 92 | 
 93 | 
 94 | def startup():
 95 |     inject.configure(config)
 96 |     from memflow.tasks.cuboxsynctask import CuboxSyncTask
 97 |     auth_code = os.environ.get("CUBOX_AUTH_CODE")
 98 |     if not auth_code:
 99 |         raise CuboxErrorException("CUBOX_AUTH_CODE not found, please set it in env")
100 |     interval_secs = int(os.environ.get('CUBOX_SYNC_INTERVAL', 300))
101 |     scheduler.add_job(CuboxSyncTask(auth_code).run, 'interval',
102 |                       seconds=interval_secs)
103 |     log.info("add job cubox sync task, interval: %s seconds" % interval_secs)
104 |     scheduler.start()
105 | 
106 | 
107 | if __name__ == "__main__":
108 |     startup()
109 |     uvicorn.run(app, host="0.0.0.0", port=os.environ.get("WEB_PORT", 8000))
110 | 


--------------------------------------------------------------------------------
/memflow/databases.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 与数据库有关的操作类
  3 | """
  4 | import datetime
  5 | import os
  6 | 
  7 | from dataclasses_json import dataclass_json
  8 | from sqlalchemy import create_engine, Column, DateTime, String, Integer, Text, select
  9 | from sqlalchemy.ext.declarative import declarative_base
 10 | from sqlalchemy.orm import Session
 11 | 
 12 | from memflow import utils
 13 | 
 14 | # WORKDIR环境变量文件夹内的db目录，为数据库文件存放目录
 15 | db_path = os.path.join(os.environ.get('WORKDIR', os.path.dirname(os.path.abspath(__file__))), 'db')
 16 | if not os.path.exists(db_path):
 17 |     os.makedirs(db_path)
 18 | engine = create_engine(
 19 |     f'sqlite:////{db_path}/main.db?check_same_thread=False&timeout=60'
 20 | )
 21 | Base = declarative_base()
 22 | 
 23 | 
 24 | def create_all():
 25 |     """
 26 |     自动初始化数据库引擎和ORM框架
 27 |     会自动生成模型定义的结构为数据表
 28 |     :return:
 29 |     """
 30 |     Base.metadata.create_all(engine)
 31 | 
 32 | 
 33 | class BaseDBModel(Base):
 34 |     """
 35 |     数据表基类，每张表的模型类继承此类
 36 |     """
 37 |     __abstract__ = True
 38 |     __table_args__ = {'extend_existing': True}
 39 |     created_at = Column(DateTime, nullable=False, default=datetime.datetime.now)
 40 |     updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now, onupdate=datetime.datetime.now)
 41 | 
 42 |     def get_columns(self):
 43 |         """
 44 |         返回所有字段对象
 45 |         :return:
 46 |         """
 47 |         return self.__table__.columns
 48 | 
 49 |     @classmethod
 50 |     def query(cls):
 51 |         session = Session(bind=engine)
 52 |         return session.query(cls)
 53 | 
 54 |     def get_fields(self):
 55 |         """
 56 |         返回所有字段
 57 |         :return:
 58 |         """
 59 |         return self.__dict__
 60 | 
 61 |     def save(self):
 62 |         """
 63 |         新增
 64 |         :return:
 65 |         """
 66 |         session = Session(bind=engine)
 67 |         try:
 68 |             session.add(self)
 69 |             session.commit()
 70 |         except BaseException as e:
 71 |             session.rollback()
 72 |             raise
 73 | 
 74 |     def update(self):
 75 |         """
 76 |         新增
 77 |         :return:
 78 |         """
 79 |         session = Session(bind=engine)
 80 |         try:
 81 |             self.updated_at = datetime.datetime.now()
 82 |             session.merge(self)
 83 |             session.commit()
 84 |         except:
 85 |             session.rollback()
 86 |             raise
 87 | 
 88 |     @staticmethod
 89 |     def save_all(model_list):
 90 |         """
 91 |         批量新增
 92 |         :param model_list:
 93 |         :return:
 94 |         """
 95 |         session = Session(bind=engine)
 96 |         try:
 97 |             session.add_all(model_list)
 98 |             session.commit()
 99 |         except:
100 |             session.rollback()
101 |             raise
102 | 
103 |     def delete(self):
104 |         session = Session(bind=engine)
105 |         try:
106 |             session.commit()
107 |         except:
108 |             session.rollback()
109 |             raise
110 | 
111 |     def to_dict(self, hidden_fields=None):
112 |         """
113 |         Json序列化
114 |         :param hidden_fields: 覆盖类属性 hidden_fields
115 |         :return:
116 |         """
117 |         model_json = {}
118 |         if not hidden_fields:
119 |             hidden_fields = self.__hidden_fields__
120 |         if not hidden_fields:
121 |             hidden_fields = []
122 |         for column in self.__dict__:
123 |             if column in hidden_fields:
124 |                 continue
125 |             if hasattr(self, column):
126 |                 model_json[column] = utils.parse_field_value(getattr(self, column))
127 |         if '_sa_instance_state' in model_json:
128 |             del model_json['_sa_instance_state']
129 |         return model_json
130 | 


--------------------------------------------------------------------------------