├── show_groupby.sh
├── tools
    ├── task.py
    ├── watch_dog.sh
    ├── disk_alert.py
    ├── delete_dup.py
    └── pycrontab.py
├── main.py
├── conf.py
├── rt.py
├── .gitignore
├── README.md
└── his.py


/show_groupby.sh:
--------------------------------------------------------------------------------
1 |  cd $1 && find . -name '?*.*' -type f -printf '%b.%f\0' |
2 | 			           awk -F . -v RS='\0' '
3 | 					               {s[$NF] += $1; n[$NF]++}
4 | 								               END {for (e in s) printf "%15d %4d %s\n", s[e]*512, n[e], e}' |
5 | 											             sort -n | numfmt --to=iec-i --suffix=B
6 | 


--------------------------------------------------------------------------------
/tools/task.py:
--------------------------------------------------------------------------------
 1 | from pycrontab import crontab, crontab_run
 2 | print("job init success")
 3 | script2 = '/volumeUSB1/usbshare/py/delete_dup.py'
 4 | crontab.every('hour').interval(3).execute(script2," -a")
 5 | # 全路径
 6 | script1 = '/volumeUSB1/usbshare/py/disk_alert.py'
 7 | crontab.every('minute').interval(30).execute(script1,None)
 8 | 
 9 | crontab_run(debug=True)
10 | 


--------------------------------------------------------------------------------
/tools/watch_dog.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | basepath=$(cd `dirname $0`; pwd)
 4 | 
 5 | cd $basepath
 6 | 
 7 | cleanup (){
 8 | echo "kill main.py task.py"
 9 | ps -ef | grep -E 'main.py|task.py'| awk '{print$2}' | xargs kill -9
10 | exit 0
11 | 
12 | }
13 | # 监控信号量
14 | trap cleanup SIGINT SIGTERM
15 | 
16 | #启动 task
17 | nohup python task.py  2>&1 > /dev/null &
18 | 
19 | # 循环监控
20 | for ((;;)) do
21 | status=$(ps -ef | grep main.py | grep -v 'grep' |grep -v 'du*' | wc -l);
22 | #echo $status;
23 | if [ $status -eq 0 ]
24 | then
25 | echo "nohup python main.py &"
26 | nohup python main.py &
27 | sleep 15 ;
28 | else
29 | echo "get media is running"
30 | fi
31 | 
32 | sleep 3 ;
33 | done


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | from telethon import TelegramClient, sync
 2 | import random
 3 | import time
 4 | import socks
 5 | from multiprocessing import Process ,cpu_count
 6 | import os
 7 | from rt import tg_watchon_class
 8 | from conf import config
 9 | import logging
10 | 
11 | # 下载 history 不是实时监听 实时监听在 `rt`
12 | 
13 | def get_media(channel_username,client):
14 |     # myself = client.get_me()
15 |     # print(channel_username)
16 |     # limit = 1000 history 1000 records
17 |     for msgs in client.get_messages(channel_username, limit=1000):
18 |         if msgs.media is not None:
19 |             salt = config.get_random_file_name()
20 |             t_dir = time.strftime("%Y-%m-%d", time.localtime())
21 |             filename = config.get_pic_path() + str(t_dir) + '/' + str(salt)
22 |             client.download_media(msgs.media, filename)
23 | 
24 | if __name__ == '__main__':
25 | 
26 |     t = tg_watchon_class()
27 |     p_list = []
28 |     # for xx in ['hao123']:
29 |     #     p_list.append(Process(target=get_media, args=('%s' % xx,t.get_client(),)))
30 | 
31 |     # 独立启动监听
32 |     p_list.append(Process(target=t.start, args=()))
33 |     for xx in p_list:
34 |         xx.start()
35 |     for xx in p_list:
36 |         xx.join()
37 |     print('(Press Ctrl+C to main thread)')
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/tools/disk_alert.py:
--------------------------------------------------------------------------------
 1 | import http
 2 | import json
 3 | import os
 4 | 
 5 | 
 6 | def send_ding_talk_robot(title, bash_line):
 7 |     import http.client
 8 |     conn = http.client.HTTPSConnection("oapi.dingtalk.com")
 9 |     payload = {
10 |         "msgtype": "markdown",
11 |         "markdown": {
12 |             "title": "### {}".format(title),
13 |             "text": "#### msg \n"
14 |                     "```\n"
15 |                     "{}"
16 | 
17 |                     "```"
18 |                     "\n".format(bash_line)
19 |         },
20 |         "at": {
21 |             "atMobiles": [
22 |                 '10086007009'
23 |             ],
24 |             "isAtAll": 'true',
25 |         },
26 |     }
27 |     headers = {
28 |         'content-type': "application/json",
29 |     }
30 |     String_textMsg = json.dumps(payload)
31 |     conn.request("POST", "/robot/send?access_token=dingding token",
32 |                  String_textMsg, headers)
33 |     res = conn.getresponse()
34 |     data = res.read()
35 |     print(data.decode("utf-8"))
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     disk_table = os.popen(
40 |         "cd /volumeUSB1/usbshare/py/ && du -sh data_online/* | awk -F' ' '{print \"| \" $1 \" | \" $2 \" |\"}'").read()
41 |     # disk_table = os.popen("cd /volumeUSB1/usbshare/py/data_online && du -sh * | awk -F' ' '{ print $1 \" - \" $2 \n }'").read()
42 |     disk_used = "{} \n{} \n ".format("大小（单位：字节 - 文件名) ", disk_table);
43 | 
44 |     send_ding_talk_robot("硬盘使用量", disk_used)
45 | 
46 |     send_ding_talk_robot("硬盘使用量", os.popen(
47 |         "df -h | grep -v 'tmpfs' |awk '{print$1 \",\"$2\",\"$3\",\"$4\",\"$5\",\"$6}'").read())
48 | 
49 |     send_ding_talk_robot("个类型文件磁盘占比", os.popen("cd /volumeUSB1/usbshare/py/data_online && bash show_groupby.sh").read())
50 | 


--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import time
 3 | 
 4 | 
 5 | class config:
 6 |     def __init__(self):
 7 |         from configparser import ConfigParser
 8 | 
 9 |         config = ConfigParser()
10 |         # 传入读取文件的地址，encoding文件编码格式，中文必须
11 |         config.read('zh_cn.config', encoding='UTF-8')
12 |         # 输出路径
13 |         self._path = config['message_download']['DATA_DIR']
14 |         self.picture_storage_path = config['message_download']['PIC_DIR']
15 |         self.proxy_addr = config['message_download']['PROXY_ADDR']
16 |         self.proxy_port = config['message_download']['PROXY_PORT']
17 |         self.API_ID = config['message_download']['API_ID']
18 |         self.API_HASH = config['message_download']['API_HASH']
19 |         self.TG_AUTH_FILE_NAME = config['message_download']['TG_AUTH_FILE_NAME']
20 | 
21 |     def getpath(self):
22 |         return self._path
23 | 
24 |     def get_TG_AUTH_FILE_NAME(self):
25 |         return self.TG_AUTH_FILE_NAME
26 | 
27 |     def get_API_HASH(self):
28 |         return self.API_HASH
29 | 
30 |     def get_API_ID(self):
31 |         return self.API_ID
32 | 
33 |     def get_pic_path(self):
34 |         return self.picture_storage_path
35 | 
36 |     def get_proxy_port(self):
37 |         return self.proxy_port
38 | 
39 |     def get_proxy_addr(self):
40 |         return self.proxy_addr
41 | 
42 |     def get_random_file_name(self):
43 |         H = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
44 |         salt = ''
45 |         for i in range(22):
46 |             salt += random.choice(H)
47 |         t_dir = time.strftime("%Y-%m-%d", time.localtime())
48 |         return salt
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     c = config()
53 |     print(c.getpath())
54 |     print(c.get_pic_path())
55 |     print(c.get_socks5_addr())
56 |     print(c.get_socks5_port())


--------------------------------------------------------------------------------
/rt.py:
--------------------------------------------------------------------------------
 1 | from telethon import TelegramClient, sync, events, utils
 2 | 
 3 | from telethon.tl.functions.messages import ForwardMessagesRequest
 4 | from telethon.tl.functions.messages import SendMessageRequest
 5 | from telethon.tl.types.messages import Messages
 6 | from telethon.tl.functions.account import UpdateStatusRequest
 7 | from telethon.tl.functions.channels import GetChannelsRequest
 8 | from telethon.tl.functions.users import GetUsersRequest
 9 | 
10 | import random
11 | import logging
12 | import time
13 | import socks
14 | from multiprocessing import Process, cpu_count
15 | 
16 | import asyncio
17 | 
18 | from conf import config
19 | 
20 | # Printing download progress
21 | def callback(current, total):
22 |     print('Downloaded', current, 'out of', total,
23 |           'bytes: {:.2%}'.format(current / total))
24 | 
25 | class tg_watchon_class:
26 | 
27 |     def __init__(self):
28 |         cfg = config()
29 |         self.cfg1 = cfg
30 |         self.data_storage_path = cfg.getpath()
31 |         self.api_id = cfg.get_API_ID()
32 |         self.api_hash = cfg.get_API_HASH()
33 | 
34 |         self.client = TelegramClient(cfg.get_TG_AUTH_FILE_NAME(), self.api_id, self.api_hash,
35 |                                      proxy=(socks.HTTP,cfg.get_proxy_addr(), int(cfg.get_proxy_port()))).start()
36 | 
37 |         @self.client.on(events.NewMessage)
38 |         async def handler(event):
39 |             print("handler init success")
40 |             '''
41 |                 print('sender: ' + str(event.input_sender) + 'to: ' + str(event.message.to_id))
42 |             '''
43 |             salt = self.cfg1.get_random_file_name()
44 |             t_dir = time.strftime("%Y-%m-%d", time.localtime())
45 |             filename_temp = self.data_storage_path + '/' + str(t_dir) + '/' + str(salt)
46 | 
47 |             print("download - " + filename_temp)
48 | 
49 |             import re
50 |             filename_ = re.findall(r"file_name='(.+?)'", str(event.media))  #
51 |             # print(str(event.media))
52 |             if len(filename_) > 0:
53 |                 filename = "{}_{}".format(filename_temp, str(filename_[0]).replace(" ", "_"))
54 |             else:
55 |                 filename = filename_temp
56 |             await event.message.download_media(filename)
57 | 
58 | 
59 |     def get_client(self):
60 |         return self.client
61 | 
62 |     def start(self):
63 |         print('(Press Ctrl+C to stop this)')
64 |         self.client.run_until_disconnected()
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | !zh_cn.config
  6 | !zh_cn.config
  7 | 
  8 | # C extensions
  9 | *.so
 10 | .idea/
 11 | .idea/*
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/tools/delete_dup.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | import time
 4 | import sys, getopt
 5 | 
 6 | 
 7 | 
 8 | from conf import config
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | def getmd5(filename):
15 |     """
16 |     获取文件 md5 码
17 |     :param filename: 文件路径
18 |     :return: 文件 md5 码
19 |     """
20 |     file_txt = open(filename, 'rb').read()
21 |     # 调用一个md5对象
22 |     m = hashlib.md5(file_txt)
23 |     # hexdigest()方法来获取摘要（加密结果）
24 |     return m.hexdigest()
25 | 
26 | 
27 | def main(argv):
28 |     path = ''
29 |     # 文件夹路径
30 |     try:
31 |         opts, args = getopt.getopt(argv, "hi:a", ["help","ifile=","aauto="])
32 | 
33 |     except getopt.GetoptError:
34 |         print('test.py -i <inputfile>')
35 |         sys.exit(2)
36 |     for opt, arg in opts:
37 |         if opt == '-h':
38 |             print('test.py -i <input dirs>')
39 |             sys.exit()
40 |         elif opt in ("-i", "--ifile"):
41 |             path = arg
42 |         elif opt in ("-a", "--aauto"):
43 | 
44 |             t_dir = time.strftime("%Y-%m-%d", time.localtime())
45 |             path = "{}/{}".format(config.getpath(),t_dir);
46 |     # 键为文件大小, 值为列表（文件路径、md5）
47 |     all_size = {}
48 |     total_file = 0
49 |     total_delete = 0
50 |     # 开始时间
51 |     start = time.time()
52 |     # 遍历文件夹下的所有文件
53 |     for file in os.listdir(path):
54 |         # 文件数量加 1
55 |         total_file += 1
56 |         # 文件的路径
57 |         real_path = os.path.join(path, file)
58 |         # 判断文件是否是文件
59 |         if os.path.isfile(real_path) == True:
60 |             # 获取文件大小
61 |             size = os.stat(real_path).st_size
62 |             # md5(默认为空)
63 |             size_and_md5 = [""]
64 |             # 如果文件大小已存在
65 |             if size in all_size.keys():
66 |                 # 获取文件的md5码
67 |                 new_md5 = getmd5(real_path)
68 |                 # 大小相同，md5 为空，添加md5
69 |                 if all_size[size][0] == "":
70 |                     all_size[size][0] = new_md5
71 |                 # md5 已存在，删除
72 |                 if new_md5 in all_size[size]:
73 |                     print('删除', real_path)
74 |                     os.remove(real_path)
75 |                     total_delete += 1
76 |                 else:
77 |                     # md5 不存在，进行添加
78 |                     all_size[size].append(new_md5)
79 |             else:
80 |                 # 如果文件大小不存在，则将此文件大小添加到 all_size 字典中
81 |                 all_size[size] = size_and_md5
82 |     # 结束时间
83 |     end = time.time()
84 |     time_last = end - start
85 |     print('文件总数：', total_file)
86 |     print('删除个数：', total_delete)
87 |     print('耗时：', time_last, '秒')
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     main(sys.argv[1:])


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## telethon_get_media
  2 | 
  3 | 
  4 | ### 分享文件相关
  5 | 
  6 | * 可以通过 https://github.com/uk0/file_encryption 加密后自释放分享 ：）Safe
  7 | 
  8 | ### 当前版本
  9 | 
 10 | * beta 1.0.0 
 11 | 
 12 | #### 功能
 13 | 
 14 | * 1.下载 Telegram 历史Media [频道id里面有自己改一下] `main.py`
 15 | 
 16 | * 2.实时下载所有频道里面的 Media [自己加入的所有频道] `rt.py`
 17 | 
 18 | * 3.下载获取历史与评论区 `his.py` 自己调用download media即可。
 19 | 
 20 | 
 21 | #### Env Python 3.6 
 22 | 
 23 |    * telethon
 24 |    * socks  ` pip install PySocks`
 25 |    * asyncio
 26 |       
 27 | 
 28 | #### 说明
 29 | 
 30 | ```bash
 31 | 
 32 | #1 https://my.telegram.org/auth 输入手机号申请 APIID
 33 | 
 34 | #2.直接把自己的API KEY 写入进去 运行程序会让你输入手机号，以及验证码。
 35 | 
 36 | #3.第一次需要输入手机号 停止后在启动不需要了就。
 37 | 
 38 | #4.纯属无聊。。。。。
 39 | 
 40 | ```
 41 | 
 42 | #### quick start
 43 | 
 44 | * 修改配置文件
 45 | 
 46 |   > `DELETE_DUP` 现在没有使用
 47 | 
 48 |   > 自己创建一个名字=`zh_cn.config`的文件和python脚本同级将以下内容稍作修改写入即可
 49 | 
 50 | ```config
 51 | [message_download]
 52 | PIC_DIR=/Users/firshme/Desktop/tmp
 53 | DATA_DIR=/Users/firshme/Desktop/tmp
 54 | DELETE_DUP=AUTO  
 55 | API_ID=100851
 56 | API_HASH=464f1f154c34c1f93057f3be
 57 | TG_AUTH_FILE_NAME=auto_download
 58 | PROXY_ADDR=127.0.0.1
 59 | PROXY_PORT=1089
 60 | ```
 61 | 
 62 | 
 63 | * 安装依赖
 64 | 
 65 | ```bash
 66 | pip install telethon
 67 | pip install PySocks
 68 | ```
 69 | 
 70 | * 启动
 71 | 
 72 | ```bash
 73 | sudo -u root /opt/miniconda3/bin/python3 main.py
 74 | 
 75 | ```
 76 | 
 77 | 
 78 | 
 79 | * 启动后 console 
 80 | 
 81 | ```bash
 82 | (Press Ctrl+C to stop this)
 83 | handler init success
 84 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/XfBFEXrBc18TJL9XjU4zcI
 85 | handler init success
 86 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/xIbFtL3zpDjImhujE8IaWX
 87 | handler init success
 88 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/DdNU5sqUv3B771R1Yr5aZt
 89 | handler init success
 90 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/UYH1CzsvgTQzyuTB2gjlKt
 91 | handler init success
 92 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/EcdhKSMTszWYFLtYlMdUGL
 93 | handler init success
 94 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/vm6Fbx1o1QR3u2VcpTK9HP
 95 | handler init success
 96 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/Wx1wj1BSmQTkdzne5nVehG
 97 | handler init success
 98 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/5hyks1pWPE5yt0ACuyyc3g
 99 | handler init success
100 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/5NqBudIhSLFeNGHdphxSPj
101 | handler init success
102 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/JIDBfFvplFxMA2ruXyaGb5
103 | 
104 | 
105 | # 查看文件夹
106 | admin@DS918:/volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06$ ls -al
107 | total 84920
108 | drwxr-xr-x 2 root  root      4096 Jul  6 23:34 .
109 | drwxr-xr-x 3 admin users     4096 Jul  6 23:26 ..
110 | -rw-r--r-- 1 root  root     22599 Jul  6 23:34 FrB1elMKv84c7pGQr7Dkmi.jpg
111 | -rw-r--r-- 1 root  root  16908288 Jul  6 23:34 UYH1CzsvgTQzyuTB2gjlKt_QMYxxx1271124695396634624-20200612_005838-vid1.mp4
112 | -rw-r--r-- 1 root  root  55574528 Jul  6 23:34 vm6Fbx1o1QR3u2VcpTK9HP_xxxxxxxxxx.mp4
113 | -rw-r--r-- 1 root  root  14417920 Jul  6 23:34 Wx1wj1BSmQTkdzne5nVehG_xxxxxxx.mp4
114 | 
115 | ```
116 | 
117 | 
118 | #### show_groupby.sh 使用
119 | 
120 | ```bash
121 | sh show_groupby.sh /path/to/dir
122 | 
123 | # 例如
124 | admin@DS918:/volume5/green_hdd/pysuper/telethon_get_media$ sh show_groupby.sh /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06
125 |          468KiB   11 jpg
126 |          699MiB    7 mp4
127 | ```
128 | 
129 | 
130 | 
131 | #### happy continue
132 | 
133 | * 先给个`✨`直接提问题即可看到就会修改。
134 | 
135 | #### tools
136 | 
137 | * 里面都是测试写的统计脚本和工具之前在arm里面跑的 后面整合以后在删除吧。
138 | 


--------------------------------------------------------------------------------
/his.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import asyncio
  3 | from telethon import TelegramClient, types
  4 | from telethon.errors import ChannelPrivateError, UsernameInvalidError, UsernameNotOccupiedError
  5 | from telethon.tl.functions.messages import GetHistoryRequest
  6 | from telethon.tl.functions.channels import GetFullChannelRequest
  7 | from telethon.tl.types import PeerChat, PeerChannel, InputPeerChat
  8 | 
  9 | from logger_config import setup_logger
 10 | logger = setup_logger()
 11 | #  支持获取评论区的内容。
 12 | class TgHistoryClass:
 13 |     def __init__(self, channel_id, limit, total_count_limit):
 14 |         # EasyImage 图床平台的 API URL 和 Token
 15 |         api_id = "xxxxxx"  # 替换为你的 API ID
 16 |         api_hash = "xxxxxx"
 17 |         username = "xxxx"
 18 | 
 19 |         # 创建客户端
 20 |         self.client = TelegramClient(username, api_id, api_hash)
 21 |         self.channel_id = channel_id
 22 |         if isinstance(self.channel_id, int):
 23 |             # 数字频道
 24 |             self.my_channel = PeerChannel(channel_id)
 25 |         self.limit = limit
 26 |         self.total_count_limit = total_count_limit
 27 | 
 28 |     def disconnection(self):
 29 |         # 这行会阻塞，直到客户端断连并且无法重连，或你主动停止
 30 |         self.client.disconnect()
 31 | 
 32 | 
 33 |     async def offline_msg_task_test(self):
 34 |         await self.client.start()
 35 |         try:
 36 |             if isinstance(self.channel_id, str):
 37 |                 self.my_channel = await self.client.get_entity(self.channel_id)
 38 |         except (UsernameInvalidError, UsernameNotOccupiedError) as e:
 39 |             logger.warning(f"频道 {self.channel_id} 不存在或无效: {e}")
 40 |             return
 41 |         except Exception as e:
 42 |             logger.error(f"获取频道 {self.channel_id} 时出错: {e}")
 43 |             return
 44 | 
 45 |         # 1. 拉频道历史消息
 46 |         all_messages = []
 47 |         offset_id = 0
 48 |         while True:
 49 |             try:
 50 |                 history = await self.client(GetHistoryRequest(
 51 |                     peer=self.my_channel,
 52 |                     offset_id=offset_id,
 53 |                     offset_date=None,
 54 |                     add_offset=0,
 55 |                     limit=self.limit,
 56 |                     max_id=0,
 57 |                     min_id=0,
 58 |                     hash=0
 59 |                 ))
 60 |                 await asyncio.sleep(random.uniform(3.5, 5.0))
 61 |             except ChannelPrivateError:
 62 |                 logger.warning(f"频道 {self.channel_id} 是私有的或无访问权限")
 63 |                 break
 64 |             except ConnectionError as e:
 65 |                 logger.warning(f"连接错误: {e}，等待 10 秒后重试")
 66 |                 await asyncio.sleep(10)
 67 |                 continue
 68 |             except Exception as e:
 69 |                 logger.error(f"获取频道 {self.channel_id} 历史消息时出错: {e}")
 70 |                 break
 71 | 
 72 |             if not history.messages:
 73 |                 break
 74 | 
 75 |             all_messages.extend(history.messages)
 76 |             offset_id = history.messages[-1].id
 77 |             if self.total_count_limit and len(all_messages) >= self.total_count_limit:
 78 |                 break
 79 | 
 80 |         logger.info(f"从频道 {self.channel_id} 获取了 {len(all_messages)} 条消息")
 81 | 
 82 |         # 2. 获取讨论组实体
 83 |         discussion_entity = None
 84 |         try:
 85 |             full = await self.client(GetFullChannelRequest(channel=self.my_channel))
 86 |             linked_id = getattr(full.full_chat, 'linked_chat_id', None)
 87 |             if linked_id:
 88 |                 discussion_entity = await self.client.get_entity(PeerChannel(linked_id))
 89 |                 logger.info(f"讨论组实体已获取: id={discussion_entity.id}, type={type(discussion_entity)}")
 90 |             else:
 91 |                 logger.info(f"频道 {self.channel_id} 未开启评论功能（无讨论组）。")
 92 |         except Exception as e:
 93 |             logger.warning(f"获取频道讨论组时出错: {e}")
 94 | 
 95 |         if discussion_entity:
 96 |             logger.info("开始扫描讨论组消息，建立转发帖映射...")
 97 |             async for dmsg in self.client.iter_messages(discussion_entity, limit=120): # limit可调整
 98 |                 if dmsg.media:
 99 |                     all_messages.append(dmsg)
100 |         # 打印并下载原始消息及其评论
101 |         try:
102 |             for msg in all_messages:
103 |                 if msg.media:
104 |                     await self.client.download_media(msg, file='./downloads/')
105 |                     logger.info(f"下载消息 {msg.id} 的 media: {msg.media}")
106 |                 print(f"【消息 {msg.id}】: {msg}")
107 |         except Exception as e:
108 |             logger.error(f"处理评论区时出错: {e}")
109 | 
110 |         await self.client.disconnect()
111 |         logger.info("客户端已断开连接")
112 | if __name__ == '__main__':
113 |     # 示例用法
114 |     channel_id = 'hao123'  # 替换为你的频道ID
115 |     limit = 100  # 每次获取的消息数量
116 |     total_count_limit = 2  # 0 表示不限制总数
117 | 
118 |     tg_history = TgHistoryClass(channel_id, limit, total_count_limit)
119 | 
120 |     # 使用 asyncio 运行异步任务
121 |     loop = asyncio.get_event_loop()
122 |     loop.run_until_complete(tg_history.offline_msg_task_test())
123 | 
124 |     tg_history.disconnection()
125 | 


--------------------------------------------------------------------------------
/tools/pycrontab.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import os, time, uuid, platform, json, codecs
  3 | import logging
  4 | from multiprocessing import Process, freeze_support, Manager
  5 | from datetime import date, datetime, timedelta
  6 | from subprocess import Popen, PIPE
  7 | 
  8 | 
  9 | __all__ = ['crontab', 'crontab_run']
 10 | 
 11 | current_path = os.path.dirname(os.path.abspath(__file__))
 12 | 
 13 | decode = 'gb2312' if platform.system() == 'Windows' else 'utf-8'
 14 | 
 15 | 
 16 | ########################################################################
 17 | class Job(object):
 18 |     """"""
 19 | 
 20 |     def __init__(self, script,script_param, executor, crontab):
 21 |         """Constructor"""
 22 |         self.job_id = uuid.uuid1().hex
 23 |         self.script = script
 24 |         self.script_param = script_param
 25 |         self.executor = executor
 26 |         self.add_time = datetime.now().replace(microsecond=0)
 27 |         self.next_time = None
 28 |         self.log_file = None
 29 |         self.log_file_timestamp = date.today().strftime('%Y%m%d')
 30 |         self.log_file_suffix = '-{timestamp}-{sequence}.log'
 31 |         self.log_file_sequence = 1
 32 |         self.logger = None
 33 |         self.status = 1 # -1:结束; 1：运行中
 34 |         self.method = crontab._method
 35 |         self.year = crontab._year
 36 |         self.month = crontab._month
 37 |         self.day = crontab._day
 38 |         self.hour = crontab._hour
 39 |         self.minute = crontab._minute
 40 |         self.second = crontab._second
 41 |         self.granula = crontab._granula
 42 |         self.begin_time = crontab._begin_time
 43 |         self.end_time = crontab._end_time
 44 |         self.gen_next_time()
 45 |         self.log()
 46 | 
 47 |     def gen_next_time(self, init=True):
 48 |         """"""
 49 |         if self.method == 'fix-all':
 50 |             self.next_time = datetime(year=self.year,
 51 |                                       month=self.month,
 52 |                                       day=self.day,
 53 |                                       hour=self.hour,
 54 |                                       minute=self.minute,
 55 |                                       second=self.second)
 56 |             self.status = -1
 57 |         elif self.method == 'fix-part':
 58 |             now = datetime.now().replace(microsecond=0)
 59 |             if not self.next_time:
 60 |                 if self.begin_time:
 61 |                     self.next_time = self.begin_time
 62 |                 else:
 63 |                     self.next_time = self.add_time
 64 | 
 65 |             if self.granula == 'year':
 66 |                 self.next_time = self.next_time.replace(month=self.month,
 67 |                                        day=self.day, hour=self.hour, minute=self.minute, second=self.second)
 68 |                 # 防止第一次计算next_time跳过当年的执行时间
 69 |                 # 比如begin_time='2018-06-01 00:00:00', 如果指定每年7月1日执行一次作业，
 70 |                 # 此时以下条件限制就可以防止添加作业后第一次的next_time跳过当年的7月
 71 |                 if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now:
 72 |                     self.next_time = self.next_time.replace(year=self.next_time.year + 1)
 73 |             elif self.granula == 'month':
 74 |                 self.next_time = self.next_time.replace(hour=self.hour, minute=self.minute, second=self.second)
 75 |                 if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now:
 76 |                     if self.next_time.month == 12:
 77 |                         self.next_time = self.next_time.replace(year=self.next_time.year + 1, month=1)
 78 |                     else:
 79 |                         self.next_time = self.next_time.replace(month=self.next_time.month + 1)
 80 | 
 81 |                 if self.day > 0:
 82 |                     self.next_time = self.next_time.replace(day=self.day)
 83 |                 else:
 84 |                     import calendar
 85 |                     days = calendar.monthrange(self.next_time.year, self.next_time.month)[1]
 86 |                     self.next_time = self.next_time.replace(day=days + self.day)
 87 | 
 88 |             elif self.granula == 'day':
 89 |                 self.next_time = self.next_time.replace(hour=self.hour, minute=self.minute, second=self.second)
 90 |                 if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now:
 91 |                     self.next_time += timedelta(days=1)
 92 |             elif self.granula == 'hour':
 93 |                 self.next_time = self.next_time.replace(minute=self.minute, second=self.second)
 94 |                 if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now:
 95 |                     self.next_time += timedelta(hours=1)
 96 |             elif self.granula == 'minute':
 97 |                 self.next_time = self.next_time.replace(second=self.second)
 98 |                 if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now:
 99 |                     self.next_time += timedelta(minutes=1)
100 | 
101 |             if self.end_time and self.next_time > self.end_time:
102 |                 self.status = -1
103 |             elif self.next_time <= datetime.now():
104 |                 self.gen_next_time()
105 | 
106 |         else: # interval
107 |             if not self.next_time:
108 |                 if self.begin_time:
109 |                     self.next_time = self.begin_time
110 |                 else:
111 |                     self.next_time = self.add_time
112 |             else:
113 |                 if self.year:
114 |                     self.next_time = self.next_time.replace(year=self.next_time.year+1)
115 |                 elif self.month:
116 |                     if self.next_time.month == 12:
117 |                         self.next_time = self.next_time.replace(year=self.next_time.year+1, month=1)
118 |                     else:
119 |                         self.next_time = self.next_time.replace(month=self.next_time.month+1)
120 |                 elif self.day:
121 |                     self.next_time += timedelta(days=self.day)
122 |                 elif self.hour:
123 |                     self.next_time += timedelta(hours=self.hour)
124 |                 elif self.minute:
125 |                     self.next_time += timedelta(minutes=self.minute)
126 |                 elif self.second:
127 |                     self.next_time += timedelta(seconds=self.second)
128 | 
129 | 
130 |                 if self.end_time and self.next_time > self.end_time:
131 |                     self.status = -1
132 |                 elif self.next_time <= datetime.now():
133 |                     self.gen_next_time()
134 | 
135 | 
136 |     def gen_log_sequence(self):
137 |         # 计算日志大小
138 |         log_file = self.log_file.format(timestamp=self.log_file_timestamp, sequence=self.log_file_sequence)
139 | 
140 |         if not os.path.exists(self.log_file):
141 |             self.log_file_sequence = 1
142 |         else:
143 |             if os.path.getsize(log_file) > self.log_size * 1024 * 1024:
144 |                 self.log_file_sequence += 1
145 | 
146 |     def log(self, path=None, prefix=None, size=None):
147 |         """"""
148 |         if path:
149 |             self.log_path = path
150 |         else:
151 |             self.log_path = os.path.join(current_path, 'log')
152 |             if not os.path.exists(self.log_path):
153 |                 os.mkdir(self.log_path)
154 | 
155 |         if prefix:
156 |             self.log_file = os.path.join(self.log_path, str(prefix) + self.log_file_suffix)
157 |         else:
158 |             self.log_file = os.path.join(self.log_path,
159 |                                          os.path.splitext(os.path.basename(self.script))[0] + self.log_file_suffix)
160 | 
161 |         if size:
162 |             self.log_size = size
163 |         else:
164 |             self.log_size = 10
165 | 
166 |     def _logger(self, debug=False):
167 |         """"""
168 |         log_file = self.log_file.format(timestamp=self.log_file_timestamp, sequence=self.log_file_sequence)
169 | 
170 |         logger = logging.getLogger(log_file)
171 |         logger.setLevel(logging.DEBUG)
172 |         if not logger.handlers:
173 |             filehandler = logging.FileHandler(log_file, encoding='utf-8')
174 |             filehandler.setLevel(logging.DEBUG)
175 | 
176 |             consolehandler = logging.StreamHandler()
177 |             consolehandler.setLevel(logging.DEBUG if debug else logging.ERROR)
178 | 
179 |             formatter = logging.Formatter("%(asctime)s - %(filename)s - %(levelname)s - %(message)s")
180 | 
181 |             filehandler.setFormatter(formatter)
182 |             consolehandler.setFormatter(formatter)
183 | 
184 |             logger.addHandler(filehandler)
185 |             logger.addHandler(consolehandler)
186 | 
187 |         return logger
188 | 
189 |     def run(self):
190 |         """"""
191 |         self.logger = self._logger()
192 |         self.logger.info('start running script: {} params : {}'.format(self.script,self.script_param))
193 |         try:
194 |             cmd = '{} {} {}'.format(self.executor, self.script,self.script_param)
195 |             p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
196 |             out, err = p.communicate()
197 |             if err or p.returncode != 0:
198 |                 self.logger.error(
199 |                     "The command finished with error: \n"
200 |                     + err.decode(decode).replace('\r', '').rstrip('\n')
201 |                 )
202 |             else:
203 |                 self.logger.info(
204 |                     "The stdout of the command: "
205 |                     + out.decode(decode).replace('\r', '').rstrip('\n')
206 |                 )
207 |         except Exception as e:
208 |             self.logger.error(
209 |                 "The command finished with error: " + e.args[0] + e.args[1]
210 |             )
211 |         finally:
212 |             self.logger.info('finish running script: {}'.format(self.script))
213 | 
214 |     def __lt__(self, other):
215 |         """"""
216 |         return self.next_time < other.next_time
217 | 
218 |     def __str__(self):
219 |         return '<Job %r, method %r, next_time %s, status %s>' % (self.script, self.method, self.next_time, self.status)
220 | 
221 | 
222 | ########################################################################
223 | class Crontab(object):
224 |     """"""
225 |     _jobs = []
226 |     job_config_file = os.path.join(current_path, 'jobs.conf')
227 |     def __init__(self):
228 |         """Constructor"""
229 |         self._method = ''
230 |         self._year = None
231 |         self._month = None
232 |         self._day = None
233 |         self._hour = None
234 |         self._minute = None
235 |         self._second = None
236 |         self._begin_time = None
237 |         self._end_time = None
238 |         self._interval = None
239 |         self._granula = None
240 |         self._granulalist = ['year', 'month', 'day', 'hour', 'minute', 'second']
241 | 
242 |     def every(self, granula='day'):
243 |         if granula not in self._granulalist:
244 |             raise Exception("granula必须在{}中".format(','.join(self._granulalist)))
245 | 
246 |         self._granula = granula
247 |         return self
248 | 
249 |     def at(self, **kwargs):
250 |         """定时间点"""
251 |         if self._method:
252 |             raise Exception("不可重用interval和at方法.")
253 | 
254 |         if not self._granula:
255 |             assert len(kwargs) == 6
256 |             for k in kwargs:
257 |                 if k not in self._granulalist:
258 |                     raise Exception("{}必须在{}中".format(k,','.join(self._granulalist)))
259 |                 setattr(self, '_' + k, kwargs[k])
260 |             assert all([12 >= self._month >= 1,
261 |                         31 >= self._day >= 1,
262 |                         23 >= self._hour >= 0,
263 |                         59 >= self._minute >= 0,
264 |                         59 >= self._second >= 0])
265 |             self._method = 'fix-all'
266 |             return self
267 | 
268 |         self._method = 'fix-part'
269 | 
270 |         if self._granula == 'year':
271 |             self._month = kwargs.get('month', 1)
272 |             self._day = kwargs.get('day', 1)
273 |             self._hour = kwargs.get('hour', 0)
274 |             self._minute = kwargs.get('minute', 0)
275 |             self._second = kwargs.get('second', 0)
276 |             assert all([12 >= self._month >= 1,
277 |                         31 >= self._day >= -5 and self._day != 0,
278 |                         23 >= self._hour >= 0,
279 |                         59 >= self._minute >= 0,
280 |                         59 >= self._second >= 0])
281 | 
282 |         elif self._granula == 'month':
283 |             self._day = kwargs.get('day', 1)
284 |             self._hour = kwargs.get('hour', 0)
285 |             self._minute = kwargs.get('minute', 0)
286 |             self._second = kwargs.get('second', 0)
287 |             assert all([31 >= self._day >= -5 and self._day != 0,
288 |                         23 >= self._hour >= 0,
289 |                         59 >= self._minute >= 0,
290 |                         59 >= self._second >= 0])
291 | 
292 |         elif self._granula == 'day':
293 |             self._hour = kwargs.get('hour', 0)
294 |             self._minute = kwargs.get('minute', 0)
295 |             self._second = kwargs.get('second', 0)
296 |             assert all([23 >= self._hour >= 0,
297 |                         59 >= self._minute >= 0,
298 |                         59 >= self._second >= 0])
299 | 
300 |         elif self._granula == 'hour':
301 |             self._minute = kwargs.get('minute', 0)
302 |             self._second = kwargs.get('second', 0)
303 |             assert all([59 >= self._minute >= 0,
304 |                         59 >= self._second >= 0])
305 | 
306 |         elif self._granula == 'minute':
307 |             self._second = kwargs.get('second', 0)
308 |             assert all([59 >= self._second >= 0])
309 | 
310 |         elif self._granulalist == 'second':
311 |             raise Exception("every('second')时不支持at,可使用interval!")
312 | 
313 |         return self
314 | 
315 |     def interval(self, num):
316 |         """定间隔"""
317 |         if self._method:
318 |             raise Exception("不可重用interval和at方法.")
319 |         if not self._granula:
320 |             raise Exception("必须先使用every方法指定频率粒度")
321 |         if not isinstance(num, int) or num < 0:
322 |             raise Exception("参数num必须为大于0的整数")
323 |         self._method = 'interval'
324 |         setattr(self, '_' + self._granula, num)
325 |         return self
326 | 
327 |     def begin(self, dtime):
328 |         """开始时间，精确到秒"""
329 |         if not isinstance(dtime, datetime):
330 |             raise Exception("dtime参数必须为datetime类型")
331 |         self._begin_time = dtime.replace(microsecond=0)
332 |         return self
333 | 
334 |     def end(self, dtime):
335 |         """结束时间，精确到秒"""
336 |         if not isinstance(dtime, datetime):
337 |             raise Exception("btime参数必须为datetime类型")
338 |         self._end_time = dtime.replace(microsecond=0)
339 |         return self
340 | 
341 |     def execute(self, script,script_param, executor='python'):
342 |         if not os.path.exists(script):
343 |             raise Exception("未找到该脚本:{}".format(script))
344 |         if os.path.splitext(script)[1].lower() != '.py' and executor.lower() =='python':
345 |             raise Exception("必须提供正确的执行程序，如python, java, bash等")
346 |         j = Job(script,script_param, executor, self)
347 |         self._jobs.append(j)
348 |         self.__init__()
349 | 
350 |     def __getstate__(self):
351 |         return self._jobs
352 | 
353 |     def __setstate__(self, state):
354 |         self._jobs = state
355 | 
356 | 
357 |     def flushJobs(self, init=False):
358 |         if init:
359 |             json_jobs = [j.__dict__ for j in self._jobs]
360 |         else:
361 |             with codecs.open(self.job_config_file, 'r', encoding='utf-8') as f:
362 |                 json_jobs = f.read()
363 |                 json_jobs = json.loads(json_jobs)
364 |                 for jj in json_jobs:
365 |                     for j in self._jobs:
366 |                         if jj['job_id'] == j.job_id:
367 |                             break
368 |                     jj['status'] = -1
369 |         with codecs.open(self.job_config_file, 'w', encoding='utf-8') as f:
370 |             json.dump(json_jobs, f, indent=4, ensure_ascii=False, separators=(',', ': '), cls=DateEncoder)
371 | 
372 | 
373 |     def loop(self, queue, debug):
374 |         self.flushJobs(init=True)
375 | 
376 |         self.last_loop_time = datetime.now().replace(microsecond=0) - timedelta(seconds=10)
377 |         while True:
378 |             now = datetime.now().replace(microsecond=0)
379 | 
380 |             # 去除已完成的job
381 |             pre_job_count = len(self._jobs)
382 |             self._jobs = [j for j in self._jobs if j.status == 1]
383 |             if pre_job_count != len(self._jobs):
384 |                 self.flushJobs()
385 | 
386 |             for j in sorted(self._jobs):
387 |                 if debug:
388 |                     j._logger(debug).info("{}".format(str(j)))
389 | 
390 |                 if self.last_loop_time < j.next_time <= now:
391 |                     if debug:
392 |                         j._logger(debug).info("put job into queue: {}".format(str(j)))
393 |                     queue.put(j)
394 |                     j.gen_next_time(init=False)
395 |                     j.gen_log_sequence()
396 |                 elif j.next_time < self.last_loop_time:
397 |                     j.gen_next_time(init=False)
398 | 
399 |             self.last_loop_time = now
400 |             time.sleep(1)
401 | 
402 | 
403 | class DateEncoder(json.JSONEncoder):
404 |     def default(self, obj):
405 |         if isinstance(obj, datetime):
406 |             return obj.strftime('%Y-%m-%d %H:%M:%S')
407 |         elif obj is None:
408 |             return ""
409 |         return json.JSONEncoder.default(self, obj)
410 | 
411 | def first_runner(queue):
412 |     while True:
413 |         j = queue.get()
414 |         j.run()
415 | 
416 | 
417 | def second_runner(queue):
418 |     while True:
419 |         j = queue.get()
420 |         j.run()
421 | 
422 | 
423 | crontab = Crontab()
424 | 
425 | def crontab_run(debug=False):
426 |     freeze_support()
427 |     with Manager() as manager:
428 |         queue = manager.Queue()
429 |         ps = [
430 |             Process(target=crontab.loop, name="crontab.loop", args=(queue, debug)),
431 |             Process(target=first_runner, name="first_runner", args=(queue,)),
432 |             Process(target=second_runner, name="second_runner", args=(queue,))
433 |         ]
434 | 
435 |         for p in ps:
436 |             p.daemon = True
437 |             p.start()
438 | 
439 |         while True:
440 |             time.sleep(5)
441 |             for p in ps:
442 |                 if not p.is_alive():
443 |                     ps.remove(p)
444 |                     print("terminate: {} {}".format(p.pid, p.name))
445 |                     if p.name == 'crontab.loop':
446 |                         p = Process(target=crontab.loop, name=p.name, args=(queue, debug))
447 |                     else:
448 |                         p = Process(target=globals()[p.name], name=p.name, args=(queue,))
449 |                     p.daemon = True
450 |                     ps.append(p)
451 |                     p.start()
452 | 
453 | 
454 | if __name__ == '__main__':
455 |     crontab_run()
456 | 
457 | 


--------------------------------------------------------------------------------