├── show_groupby.sh ├── tools ├── task.py ├── watch_dog.sh ├── disk_alert.py ├── delete_dup.py └── pycrontab.py ├── main.py ├── conf.py ├── rt.py ├── .gitignore ├── README.md └── his.py /show_groupby.sh: -------------------------------------------------------------------------------- 1 | cd $1 && find . -name '?*.*' -type f -printf '%b.%f\0' | 2 | awk -F . -v RS='\0' ' 3 | {s[$NF] += $1; n[$NF]++} 4 | END {for (e in s) printf "%15d %4d %s\n", s[e]*512, n[e], e}' | 5 | sort -n | numfmt --to=iec-i --suffix=B 6 | -------------------------------------------------------------------------------- /tools/task.py: -------------------------------------------------------------------------------- 1 | from pycrontab import crontab, crontab_run 2 | print("job init success") 3 | script2 = '/volumeUSB1/usbshare/py/delete_dup.py' 4 | crontab.every('hour').interval(3).execute(script2," -a") 5 | # 全路径 6 | script1 = '/volumeUSB1/usbshare/py/disk_alert.py' 7 | crontab.every('minute').interval(30).execute(script1,None) 8 | 9 | crontab_run(debug=True) 10 | -------------------------------------------------------------------------------- /tools/watch_dog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | basepath=$(cd `dirname $0`; pwd) 4 | 5 | cd $basepath 6 | 7 | cleanup (){ 8 | echo "kill main.py task.py" 9 | ps -ef | grep -E 'main.py|task.py'| awk '{print$2}' | xargs kill -9 10 | exit 0 11 | 12 | } 13 | # 监控信号量 14 | trap cleanup SIGINT SIGTERM 15 | 16 | #启动 task 17 | nohup python task.py 2>&1 > /dev/null & 18 | 19 | # 循环监控 20 | for ((;;)) do 21 | status=$(ps -ef | grep main.py | grep -v 'grep' |grep -v 'du*' | wc -l); 22 | #echo $status; 23 | if [ $status -eq 0 ] 24 | then 25 | echo "nohup python main.py &" 26 | nohup python main.py & 27 | sleep 15 ; 28 | else 29 | echo "get media is running" 30 | fi 31 | 32 | sleep 3 ; 33 | done -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from telethon import TelegramClient, sync 2 | import random 3 | import time 4 | import socks 5 | from multiprocessing import Process ,cpu_count 6 | import os 7 | from rt import tg_watchon_class 8 | from conf import config 9 | import logging 10 | 11 | # 下载 history 不是实时监听 实时监听在 `rt` 12 | 13 | def get_media(channel_username,client): 14 | # myself = client.get_me() 15 | # print(channel_username) 16 | # limit = 1000 history 1000 records 17 | for msgs in client.get_messages(channel_username, limit=1000): 18 | if msgs.media is not None: 19 | salt = config.get_random_file_name() 20 | t_dir = time.strftime("%Y-%m-%d", time.localtime()) 21 | filename = config.get_pic_path() + str(t_dir) + '/' + str(salt) 22 | client.download_media(msgs.media, filename) 23 | 24 | if __name__ == '__main__': 25 | 26 | t = tg_watchon_class() 27 | p_list = [] 28 | # for xx in ['hao123']: 29 | # p_list.append(Process(target=get_media, args=('%s' % xx,t.get_client(),))) 30 | 31 | # 独立启动监听 32 | p_list.append(Process(target=t.start, args=())) 33 | for xx in p_list: 34 | xx.start() 35 | for xx in p_list: 36 | xx.join() 37 | print('(Press Ctrl+C to main thread)') 38 | 39 | 40 | -------------------------------------------------------------------------------- /tools/disk_alert.py: -------------------------------------------------------------------------------- 1 | import http 2 | import json 3 | import os 4 | 5 | 6 | def send_ding_talk_robot(title, bash_line): 7 | import http.client 8 | conn = http.client.HTTPSConnection("oapi.dingtalk.com") 9 | payload = { 10 | "msgtype": "markdown", 11 | "markdown": { 12 | "title": "### {}".format(title), 13 | "text": "#### msg \n" 14 | "```\n" 15 | "{}" 16 | 17 | "```" 18 | "\n".format(bash_line) 19 | }, 20 | "at": { 21 | "atMobiles": [ 22 | '10086007009' 23 | ], 24 | "isAtAll": 'true', 25 | }, 26 | } 27 | headers = { 28 | 'content-type': "application/json", 29 | } 30 | String_textMsg = json.dumps(payload) 31 | conn.request("POST", "/robot/send?access_token=dingding token", 32 | String_textMsg, headers) 33 | res = conn.getresponse() 34 | data = res.read() 35 | print(data.decode("utf-8")) 36 | 37 | 38 | if __name__ == '__main__': 39 | disk_table = os.popen( 40 | "cd /volumeUSB1/usbshare/py/ && du -sh data_online/* | awk -F' ' '{print \"| \" $1 \" | \" $2 \" |\"}'").read() 41 | # disk_table = os.popen("cd /volumeUSB1/usbshare/py/data_online && du -sh * | awk -F' ' '{ print $1 \" - \" $2 \n }'").read() 42 | disk_used = "{} \n{} \n ".format("大小(单位:字节 - 文件名) ", disk_table); 43 | 44 | send_ding_talk_robot("硬盘使用量", disk_used) 45 | 46 | send_ding_talk_robot("硬盘使用量", os.popen( 47 | "df -h | grep -v 'tmpfs' |awk '{print$1 \",\"$2\",\"$3\",\"$4\",\"$5\",\"$6}'").read()) 48 | 49 | send_ding_talk_robot("个类型文件磁盘占比", os.popen("cd /volumeUSB1/usbshare/py/data_online && bash show_groupby.sh").read()) 50 | -------------------------------------------------------------------------------- /conf.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | 4 | 5 | class config: 6 | def __init__(self): 7 | from configparser import ConfigParser 8 | 9 | config = ConfigParser() 10 | # 传入读取文件的地址,encoding文件编码格式,中文必须 11 | config.read('zh_cn.config', encoding='UTF-8') 12 | # 输出路径 13 | self._path = config['message_download']['DATA_DIR'] 14 | self.picture_storage_path = config['message_download']['PIC_DIR'] 15 | self.proxy_addr = config['message_download']['PROXY_ADDR'] 16 | self.proxy_port = config['message_download']['PROXY_PORT'] 17 | self.API_ID = config['message_download']['API_ID'] 18 | self.API_HASH = config['message_download']['API_HASH'] 19 | self.TG_AUTH_FILE_NAME = config['message_download']['TG_AUTH_FILE_NAME'] 20 | 21 | def getpath(self): 22 | return self._path 23 | 24 | def get_TG_AUTH_FILE_NAME(self): 25 | return self.TG_AUTH_FILE_NAME 26 | 27 | def get_API_HASH(self): 28 | return self.API_HASH 29 | 30 | def get_API_ID(self): 31 | return self.API_ID 32 | 33 | def get_pic_path(self): 34 | return self.picture_storage_path 35 | 36 | def get_proxy_port(self): 37 | return self.proxy_port 38 | 39 | def get_proxy_addr(self): 40 | return self.proxy_addr 41 | 42 | def get_random_file_name(self): 43 | H = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789' 44 | salt = '' 45 | for i in range(22): 46 | salt += random.choice(H) 47 | t_dir = time.strftime("%Y-%m-%d", time.localtime()) 48 | return salt 49 | 50 | 51 | if __name__ == '__main__': 52 | c = config() 53 | print(c.getpath()) 54 | print(c.get_pic_path()) 55 | print(c.get_socks5_addr()) 56 | print(c.get_socks5_port()) -------------------------------------------------------------------------------- /rt.py: -------------------------------------------------------------------------------- 1 | from telethon import TelegramClient, sync, events, utils 2 | 3 | from telethon.tl.functions.messages import ForwardMessagesRequest 4 | from telethon.tl.functions.messages import SendMessageRequest 5 | from telethon.tl.types.messages import Messages 6 | from telethon.tl.functions.account import UpdateStatusRequest 7 | from telethon.tl.functions.channels import GetChannelsRequest 8 | from telethon.tl.functions.users import GetUsersRequest 9 | 10 | import random 11 | import logging 12 | import time 13 | import socks 14 | from multiprocessing import Process, cpu_count 15 | 16 | import asyncio 17 | 18 | from conf import config 19 | 20 | # Printing download progress 21 | def callback(current, total): 22 | print('Downloaded', current, 'out of', total, 23 | 'bytes: {:.2%}'.format(current / total)) 24 | 25 | class tg_watchon_class: 26 | 27 | def __init__(self): 28 | cfg = config() 29 | self.cfg1 = cfg 30 | self.data_storage_path = cfg.getpath() 31 | self.api_id = cfg.get_API_ID() 32 | self.api_hash = cfg.get_API_HASH() 33 | 34 | self.client = TelegramClient(cfg.get_TG_AUTH_FILE_NAME(), self.api_id, self.api_hash, 35 | proxy=(socks.HTTP,cfg.get_proxy_addr(), int(cfg.get_proxy_port()))).start() 36 | 37 | @self.client.on(events.NewMessage) 38 | async def handler(event): 39 | print("handler init success") 40 | ''' 41 | print('sender: ' + str(event.input_sender) + 'to: ' + str(event.message.to_id)) 42 | ''' 43 | salt = self.cfg1.get_random_file_name() 44 | t_dir = time.strftime("%Y-%m-%d", time.localtime()) 45 | filename_temp = self.data_storage_path + '/' + str(t_dir) + '/' + str(salt) 46 | 47 | print("download - " + filename_temp) 48 | 49 | import re 50 | filename_ = re.findall(r"file_name='(.+?)'", str(event.media)) # 51 | # print(str(event.media)) 52 | if len(filename_) > 0: 53 | filename = "{}_{}".format(filename_temp, str(filename_[0]).replace(" ", "_")) 54 | else: 55 | filename = filename_temp 56 | await event.message.download_media(filename) 57 | 58 | 59 | def get_client(self): 60 | return self.client 61 | 62 | def start(self): 63 | print('(Press Ctrl+C to stop this)') 64 | self.client.run_until_disconnected() 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | !zh_cn.config 6 | !zh_cn.config 7 | 8 | # C extensions 9 | *.so 10 | .idea/ 11 | .idea/* 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /tools/delete_dup.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import time 4 | import sys, getopt 5 | 6 | 7 | 8 | from conf import config 9 | 10 | 11 | 12 | 13 | 14 | def getmd5(filename): 15 | """ 16 | 获取文件 md5 码 17 | :param filename: 文件路径 18 | :return: 文件 md5 码 19 | """ 20 | file_txt = open(filename, 'rb').read() 21 | # 调用一个md5对象 22 | m = hashlib.md5(file_txt) 23 | # hexdigest()方法来获取摘要(加密结果) 24 | return m.hexdigest() 25 | 26 | 27 | def main(argv): 28 | path = '' 29 | # 文件夹路径 30 | try: 31 | opts, args = getopt.getopt(argv, "hi:a", ["help","ifile=","aauto="]) 32 | 33 | except getopt.GetoptError: 34 | print('test.py -i ') 35 | sys.exit(2) 36 | for opt, arg in opts: 37 | if opt == '-h': 38 | print('test.py -i ') 39 | sys.exit() 40 | elif opt in ("-i", "--ifile"): 41 | path = arg 42 | elif opt in ("-a", "--aauto"): 43 | 44 | t_dir = time.strftime("%Y-%m-%d", time.localtime()) 45 | path = "{}/{}".format(config.getpath(),t_dir); 46 | # 键为文件大小, 值为列表(文件路径、md5) 47 | all_size = {} 48 | total_file = 0 49 | total_delete = 0 50 | # 开始时间 51 | start = time.time() 52 | # 遍历文件夹下的所有文件 53 | for file in os.listdir(path): 54 | # 文件数量加 1 55 | total_file += 1 56 | # 文件的路径 57 | real_path = os.path.join(path, file) 58 | # 判断文件是否是文件 59 | if os.path.isfile(real_path) == True: 60 | # 获取文件大小 61 | size = os.stat(real_path).st_size 62 | # md5(默认为空) 63 | size_and_md5 = [""] 64 | # 如果文件大小已存在 65 | if size in all_size.keys(): 66 | # 获取文件的md5码 67 | new_md5 = getmd5(real_path) 68 | # 大小相同,md5 为空,添加md5 69 | if all_size[size][0] == "": 70 | all_size[size][0] = new_md5 71 | # md5 已存在,删除 72 | if new_md5 in all_size[size]: 73 | print('删除', real_path) 74 | os.remove(real_path) 75 | total_delete += 1 76 | else: 77 | # md5 不存在,进行添加 78 | all_size[size].append(new_md5) 79 | else: 80 | # 如果文件大小不存在,则将此文件大小添加到 all_size 字典中 81 | all_size[size] = size_and_md5 82 | # 结束时间 83 | end = time.time() 84 | time_last = end - start 85 | print('文件总数:', total_file) 86 | print('删除个数:', total_delete) 87 | print('耗时:', time_last, '秒') 88 | 89 | 90 | if __name__ == '__main__': 91 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## telethon_get_media 2 | 3 | 4 | ### 分享文件相关 5 | 6 | * 可以通过 https://github.com/uk0/file_encryption 加密后自释放分享 :)Safe 7 | 8 | ### 当前版本 9 | 10 | * beta 1.0.0 11 | 12 | #### 功能 13 | 14 | * 1.下载 Telegram 历史Media [频道id里面有自己改一下] `main.py` 15 | 16 | * 2.实时下载所有频道里面的 Media [自己加入的所有频道] `rt.py` 17 | 18 | * 3.下载获取历史与评论区 `his.py` 自己调用download media即可。 19 | 20 | 21 | #### Env Python 3.6 22 | 23 | * telethon 24 | * socks ` pip install PySocks` 25 | * asyncio 26 | 27 | 28 | #### 说明 29 | 30 | ```bash 31 | 32 | #1 https://my.telegram.org/auth 输入手机号申请 APIID 33 | 34 | #2.直接把自己的API KEY 写入进去 运行程序会让你输入手机号,以及验证码。 35 | 36 | #3.第一次需要输入手机号 停止后在启动不需要了就。 37 | 38 | #4.纯属无聊。。。。。 39 | 40 | ``` 41 | 42 | #### quick start 43 | 44 | * 修改配置文件 45 | 46 | > `DELETE_DUP` 现在没有使用 47 | 48 | > 自己创建一个名字=`zh_cn.config`的文件和python脚本同级将以下内容稍作修改写入即可 49 | 50 | ```config 51 | [message_download] 52 | PIC_DIR=/Users/firshme/Desktop/tmp 53 | DATA_DIR=/Users/firshme/Desktop/tmp 54 | DELETE_DUP=AUTO 55 | API_ID=100851 56 | API_HASH=464f1f154c34c1f93057f3be 57 | TG_AUTH_FILE_NAME=auto_download 58 | PROXY_ADDR=127.0.0.1 59 | PROXY_PORT=1089 60 | ``` 61 | 62 | 63 | * 安装依赖 64 | 65 | ```bash 66 | pip install telethon 67 | pip install PySocks 68 | ``` 69 | 70 | * 启动 71 | 72 | ```bash 73 | sudo -u root /opt/miniconda3/bin/python3 main.py 74 | 75 | ``` 76 | 77 | 78 | 79 | * 启动后 console 80 | 81 | ```bash 82 | (Press Ctrl+C to stop this) 83 | handler init success 84 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/XfBFEXrBc18TJL9XjU4zcI 85 | handler init success 86 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/xIbFtL3zpDjImhujE8IaWX 87 | handler init success 88 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/DdNU5sqUv3B771R1Yr5aZt 89 | handler init success 90 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/UYH1CzsvgTQzyuTB2gjlKt 91 | handler init success 92 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/EcdhKSMTszWYFLtYlMdUGL 93 | handler init success 94 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/vm6Fbx1o1QR3u2VcpTK9HP 95 | handler init success 96 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/Wx1wj1BSmQTkdzne5nVehG 97 | handler init success 98 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/5hyks1pWPE5yt0ACuyyc3g 99 | handler init success 100 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/5NqBudIhSLFeNGHdphxSPj 101 | handler init success 102 | download - /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06/JIDBfFvplFxMA2ruXyaGb5 103 | 104 | 105 | # 查看文件夹 106 | admin@DS918:/volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06$ ls -al 107 | total 84920 108 | drwxr-xr-x 2 root root 4096 Jul 6 23:34 . 109 | drwxr-xr-x 3 admin users 4096 Jul 6 23:26 .. 110 | -rw-r--r-- 1 root root 22599 Jul 6 23:34 FrB1elMKv84c7pGQr7Dkmi.jpg 111 | -rw-r--r-- 1 root root 16908288 Jul 6 23:34 UYH1CzsvgTQzyuTB2gjlKt_QMYxxx1271124695396634624-20200612_005838-vid1.mp4 112 | -rw-r--r-- 1 root root 55574528 Jul 6 23:34 vm6Fbx1o1QR3u2VcpTK9HP_xxxxxxxxxx.mp4 113 | -rw-r--r-- 1 root root 14417920 Jul 6 23:34 Wx1wj1BSmQTkdzne5nVehG_xxxxxxx.mp4 114 | 115 | ``` 116 | 117 | 118 | #### show_groupby.sh 使用 119 | 120 | ```bash 121 | sh show_groupby.sh /path/to/dir 122 | 123 | # 例如 124 | admin@DS918:/volume5/green_hdd/pysuper/telethon_get_media$ sh show_groupby.sh /volume5/green_hdd/pysuper/telethon_get_media/data/2022-07-06 125 | 468KiB 11 jpg 126 | 699MiB 7 mp4 127 | ``` 128 | 129 | 130 | 131 | #### happy continue 132 | 133 | * 先给个`✨`直接提问题即可看到就会修改。 134 | 135 | #### tools 136 | 137 | * 里面都是测试写的统计脚本和工具之前在arm里面跑的 后面整合以后在删除吧。 138 | -------------------------------------------------------------------------------- /his.py: -------------------------------------------------------------------------------- 1 | import random 2 | import asyncio 3 | from telethon import TelegramClient, types 4 | from telethon.errors import ChannelPrivateError, UsernameInvalidError, UsernameNotOccupiedError 5 | from telethon.tl.functions.messages import GetHistoryRequest 6 | from telethon.tl.functions.channels import GetFullChannelRequest 7 | from telethon.tl.types import PeerChat, PeerChannel, InputPeerChat 8 | 9 | from logger_config import setup_logger 10 | logger = setup_logger() 11 | # 支持获取评论区的内容。 12 | class TgHistoryClass: 13 | def __init__(self, channel_id, limit, total_count_limit): 14 | # EasyImage 图床平台的 API URL 和 Token 15 | api_id = "xxxxxx" # 替换为你的 API ID 16 | api_hash = "xxxxxx" 17 | username = "xxxx" 18 | 19 | # 创建客户端 20 | self.client = TelegramClient(username, api_id, api_hash) 21 | self.channel_id = channel_id 22 | if isinstance(self.channel_id, int): 23 | # 数字频道 24 | self.my_channel = PeerChannel(channel_id) 25 | self.limit = limit 26 | self.total_count_limit = total_count_limit 27 | 28 | def disconnection(self): 29 | # 这行会阻塞,直到客户端断连并且无法重连,或你主动停止 30 | self.client.disconnect() 31 | 32 | 33 | async def offline_msg_task_test(self): 34 | await self.client.start() 35 | try: 36 | if isinstance(self.channel_id, str): 37 | self.my_channel = await self.client.get_entity(self.channel_id) 38 | except (UsernameInvalidError, UsernameNotOccupiedError) as e: 39 | logger.warning(f"频道 {self.channel_id} 不存在或无效: {e}") 40 | return 41 | except Exception as e: 42 | logger.error(f"获取频道 {self.channel_id} 时出错: {e}") 43 | return 44 | 45 | # 1. 拉频道历史消息 46 | all_messages = [] 47 | offset_id = 0 48 | while True: 49 | try: 50 | history = await self.client(GetHistoryRequest( 51 | peer=self.my_channel, 52 | offset_id=offset_id, 53 | offset_date=None, 54 | add_offset=0, 55 | limit=self.limit, 56 | max_id=0, 57 | min_id=0, 58 | hash=0 59 | )) 60 | await asyncio.sleep(random.uniform(3.5, 5.0)) 61 | except ChannelPrivateError: 62 | logger.warning(f"频道 {self.channel_id} 是私有的或无访问权限") 63 | break 64 | except ConnectionError as e: 65 | logger.warning(f"连接错误: {e},等待 10 秒后重试") 66 | await asyncio.sleep(10) 67 | continue 68 | except Exception as e: 69 | logger.error(f"获取频道 {self.channel_id} 历史消息时出错: {e}") 70 | break 71 | 72 | if not history.messages: 73 | break 74 | 75 | all_messages.extend(history.messages) 76 | offset_id = history.messages[-1].id 77 | if self.total_count_limit and len(all_messages) >= self.total_count_limit: 78 | break 79 | 80 | logger.info(f"从频道 {self.channel_id} 获取了 {len(all_messages)} 条消息") 81 | 82 | # 2. 获取讨论组实体 83 | discussion_entity = None 84 | try: 85 | full = await self.client(GetFullChannelRequest(channel=self.my_channel)) 86 | linked_id = getattr(full.full_chat, 'linked_chat_id', None) 87 | if linked_id: 88 | discussion_entity = await self.client.get_entity(PeerChannel(linked_id)) 89 | logger.info(f"讨论组实体已获取: id={discussion_entity.id}, type={type(discussion_entity)}") 90 | else: 91 | logger.info(f"频道 {self.channel_id} 未开启评论功能(无讨论组)。") 92 | except Exception as e: 93 | logger.warning(f"获取频道讨论组时出错: {e}") 94 | 95 | if discussion_entity: 96 | logger.info("开始扫描讨论组消息,建立转发帖映射...") 97 | async for dmsg in self.client.iter_messages(discussion_entity, limit=120): # limit可调整 98 | if dmsg.media: 99 | all_messages.append(dmsg) 100 | # 打印并下载原始消息及其评论 101 | try: 102 | for msg in all_messages: 103 | if msg.media: 104 | await self.client.download_media(msg, file='./downloads/') 105 | logger.info(f"下载消息 {msg.id} 的 media: {msg.media}") 106 | print(f"【消息 {msg.id}】: {msg}") 107 | except Exception as e: 108 | logger.error(f"处理评论区时出错: {e}") 109 | 110 | await self.client.disconnect() 111 | logger.info("客户端已断开连接") 112 | if __name__ == '__main__': 113 | # 示例用法 114 | channel_id = 'hao123' # 替换为你的频道ID 115 | limit = 100 # 每次获取的消息数量 116 | total_count_limit = 2 # 0 表示不限制总数 117 | 118 | tg_history = TgHistoryClass(channel_id, limit, total_count_limit) 119 | 120 | # 使用 asyncio 运行异步任务 121 | loop = asyncio.get_event_loop() 122 | loop.run_until_complete(tg_history.offline_msg_task_test()) 123 | 124 | tg_history.disconnection() 125 | -------------------------------------------------------------------------------- /tools/pycrontab.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import os, time, uuid, platform, json, codecs 3 | import logging 4 | from multiprocessing import Process, freeze_support, Manager 5 | from datetime import date, datetime, timedelta 6 | from subprocess import Popen, PIPE 7 | 8 | 9 | __all__ = ['crontab', 'crontab_run'] 10 | 11 | current_path = os.path.dirname(os.path.abspath(__file__)) 12 | 13 | decode = 'gb2312' if platform.system() == 'Windows' else 'utf-8' 14 | 15 | 16 | ######################################################################## 17 | class Job(object): 18 | """""" 19 | 20 | def __init__(self, script,script_param, executor, crontab): 21 | """Constructor""" 22 | self.job_id = uuid.uuid1().hex 23 | self.script = script 24 | self.script_param = script_param 25 | self.executor = executor 26 | self.add_time = datetime.now().replace(microsecond=0) 27 | self.next_time = None 28 | self.log_file = None 29 | self.log_file_timestamp = date.today().strftime('%Y%m%d') 30 | self.log_file_suffix = '-{timestamp}-{sequence}.log' 31 | self.log_file_sequence = 1 32 | self.logger = None 33 | self.status = 1 # -1:结束; 1:运行中 34 | self.method = crontab._method 35 | self.year = crontab._year 36 | self.month = crontab._month 37 | self.day = crontab._day 38 | self.hour = crontab._hour 39 | self.minute = crontab._minute 40 | self.second = crontab._second 41 | self.granula = crontab._granula 42 | self.begin_time = crontab._begin_time 43 | self.end_time = crontab._end_time 44 | self.gen_next_time() 45 | self.log() 46 | 47 | def gen_next_time(self, init=True): 48 | """""" 49 | if self.method == 'fix-all': 50 | self.next_time = datetime(year=self.year, 51 | month=self.month, 52 | day=self.day, 53 | hour=self.hour, 54 | minute=self.minute, 55 | second=self.second) 56 | self.status = -1 57 | elif self.method == 'fix-part': 58 | now = datetime.now().replace(microsecond=0) 59 | if not self.next_time: 60 | if self.begin_time: 61 | self.next_time = self.begin_time 62 | else: 63 | self.next_time = self.add_time 64 | 65 | if self.granula == 'year': 66 | self.next_time = self.next_time.replace(month=self.month, 67 | day=self.day, hour=self.hour, minute=self.minute, second=self.second) 68 | # 防止第一次计算next_time跳过当年的执行时间 69 | # 比如begin_time='2018-06-01 00:00:00', 如果指定每年7月1日执行一次作业, 70 | # 此时以下条件限制就可以防止添加作业后第一次的next_time跳过当年的7月 71 | if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now: 72 | self.next_time = self.next_time.replace(year=self.next_time.year + 1) 73 | elif self.granula == 'month': 74 | self.next_time = self.next_time.replace(hour=self.hour, minute=self.minute, second=self.second) 75 | if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now: 76 | if self.next_time.month == 12: 77 | self.next_time = self.next_time.replace(year=self.next_time.year + 1, month=1) 78 | else: 79 | self.next_time = self.next_time.replace(month=self.next_time.month + 1) 80 | 81 | if self.day > 0: 82 | self.next_time = self.next_time.replace(day=self.day) 83 | else: 84 | import calendar 85 | days = calendar.monthrange(self.next_time.year, self.next_time.month)[1] 86 | self.next_time = self.next_time.replace(day=days + self.day) 87 | 88 | elif self.granula == 'day': 89 | self.next_time = self.next_time.replace(hour=self.hour, minute=self.minute, second=self.second) 90 | if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now: 91 | self.next_time += timedelta(days=1) 92 | elif self.granula == 'hour': 93 | self.next_time = self.next_time.replace(minute=self.minute, second=self.second) 94 | if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now: 95 | self.next_time += timedelta(hours=1) 96 | elif self.granula == 'minute': 97 | self.next_time = self.next_time.replace(second=self.second) 98 | if (self.begin_time and self.next_time < self.begin_time) or init==False or self.next_time < now: 99 | self.next_time += timedelta(minutes=1) 100 | 101 | if self.end_time and self.next_time > self.end_time: 102 | self.status = -1 103 | elif self.next_time <= datetime.now(): 104 | self.gen_next_time() 105 | 106 | else: # interval 107 | if not self.next_time: 108 | if self.begin_time: 109 | self.next_time = self.begin_time 110 | else: 111 | self.next_time = self.add_time 112 | else: 113 | if self.year: 114 | self.next_time = self.next_time.replace(year=self.next_time.year+1) 115 | elif self.month: 116 | if self.next_time.month == 12: 117 | self.next_time = self.next_time.replace(year=self.next_time.year+1, month=1) 118 | else: 119 | self.next_time = self.next_time.replace(month=self.next_time.month+1) 120 | elif self.day: 121 | self.next_time += timedelta(days=self.day) 122 | elif self.hour: 123 | self.next_time += timedelta(hours=self.hour) 124 | elif self.minute: 125 | self.next_time += timedelta(minutes=self.minute) 126 | elif self.second: 127 | self.next_time += timedelta(seconds=self.second) 128 | 129 | 130 | if self.end_time and self.next_time > self.end_time: 131 | self.status = -1 132 | elif self.next_time <= datetime.now(): 133 | self.gen_next_time() 134 | 135 | 136 | def gen_log_sequence(self): 137 | # 计算日志大小 138 | log_file = self.log_file.format(timestamp=self.log_file_timestamp, sequence=self.log_file_sequence) 139 | 140 | if not os.path.exists(self.log_file): 141 | self.log_file_sequence = 1 142 | else: 143 | if os.path.getsize(log_file) > self.log_size * 1024 * 1024: 144 | self.log_file_sequence += 1 145 | 146 | def log(self, path=None, prefix=None, size=None): 147 | """""" 148 | if path: 149 | self.log_path = path 150 | else: 151 | self.log_path = os.path.join(current_path, 'log') 152 | if not os.path.exists(self.log_path): 153 | os.mkdir(self.log_path) 154 | 155 | if prefix: 156 | self.log_file = os.path.join(self.log_path, str(prefix) + self.log_file_suffix) 157 | else: 158 | self.log_file = os.path.join(self.log_path, 159 | os.path.splitext(os.path.basename(self.script))[0] + self.log_file_suffix) 160 | 161 | if size: 162 | self.log_size = size 163 | else: 164 | self.log_size = 10 165 | 166 | def _logger(self, debug=False): 167 | """""" 168 | log_file = self.log_file.format(timestamp=self.log_file_timestamp, sequence=self.log_file_sequence) 169 | 170 | logger = logging.getLogger(log_file) 171 | logger.setLevel(logging.DEBUG) 172 | if not logger.handlers: 173 | filehandler = logging.FileHandler(log_file, encoding='utf-8') 174 | filehandler.setLevel(logging.DEBUG) 175 | 176 | consolehandler = logging.StreamHandler() 177 | consolehandler.setLevel(logging.DEBUG if debug else logging.ERROR) 178 | 179 | formatter = logging.Formatter("%(asctime)s - %(filename)s - %(levelname)s - %(message)s") 180 | 181 | filehandler.setFormatter(formatter) 182 | consolehandler.setFormatter(formatter) 183 | 184 | logger.addHandler(filehandler) 185 | logger.addHandler(consolehandler) 186 | 187 | return logger 188 | 189 | def run(self): 190 | """""" 191 | self.logger = self._logger() 192 | self.logger.info('start running script: {} params : {}'.format(self.script,self.script_param)) 193 | try: 194 | cmd = '{} {} {}'.format(self.executor, self.script,self.script_param) 195 | p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) 196 | out, err = p.communicate() 197 | if err or p.returncode != 0: 198 | self.logger.error( 199 | "The command finished with error: \n" 200 | + err.decode(decode).replace('\r', '').rstrip('\n') 201 | ) 202 | else: 203 | self.logger.info( 204 | "The stdout of the command: " 205 | + out.decode(decode).replace('\r', '').rstrip('\n') 206 | ) 207 | except Exception as e: 208 | self.logger.error( 209 | "The command finished with error: " + e.args[0] + e.args[1] 210 | ) 211 | finally: 212 | self.logger.info('finish running script: {}'.format(self.script)) 213 | 214 | def __lt__(self, other): 215 | """""" 216 | return self.next_time < other.next_time 217 | 218 | def __str__(self): 219 | return '' % (self.script, self.method, self.next_time, self.status) 220 | 221 | 222 | ######################################################################## 223 | class Crontab(object): 224 | """""" 225 | _jobs = [] 226 | job_config_file = os.path.join(current_path, 'jobs.conf') 227 | def __init__(self): 228 | """Constructor""" 229 | self._method = '' 230 | self._year = None 231 | self._month = None 232 | self._day = None 233 | self._hour = None 234 | self._minute = None 235 | self._second = None 236 | self._begin_time = None 237 | self._end_time = None 238 | self._interval = None 239 | self._granula = None 240 | self._granulalist = ['year', 'month', 'day', 'hour', 'minute', 'second'] 241 | 242 | def every(self, granula='day'): 243 | if granula not in self._granulalist: 244 | raise Exception("granula必须在{}中".format(','.join(self._granulalist))) 245 | 246 | self._granula = granula 247 | return self 248 | 249 | def at(self, **kwargs): 250 | """定时间点""" 251 | if self._method: 252 | raise Exception("不可重用interval和at方法.") 253 | 254 | if not self._granula: 255 | assert len(kwargs) == 6 256 | for k in kwargs: 257 | if k not in self._granulalist: 258 | raise Exception("{}必须在{}中".format(k,','.join(self._granulalist))) 259 | setattr(self, '_' + k, kwargs[k]) 260 | assert all([12 >= self._month >= 1, 261 | 31 >= self._day >= 1, 262 | 23 >= self._hour >= 0, 263 | 59 >= self._minute >= 0, 264 | 59 >= self._second >= 0]) 265 | self._method = 'fix-all' 266 | return self 267 | 268 | self._method = 'fix-part' 269 | 270 | if self._granula == 'year': 271 | self._month = kwargs.get('month', 1) 272 | self._day = kwargs.get('day', 1) 273 | self._hour = kwargs.get('hour', 0) 274 | self._minute = kwargs.get('minute', 0) 275 | self._second = kwargs.get('second', 0) 276 | assert all([12 >= self._month >= 1, 277 | 31 >= self._day >= -5 and self._day != 0, 278 | 23 >= self._hour >= 0, 279 | 59 >= self._minute >= 0, 280 | 59 >= self._second >= 0]) 281 | 282 | elif self._granula == 'month': 283 | self._day = kwargs.get('day', 1) 284 | self._hour = kwargs.get('hour', 0) 285 | self._minute = kwargs.get('minute', 0) 286 | self._second = kwargs.get('second', 0) 287 | assert all([31 >= self._day >= -5 and self._day != 0, 288 | 23 >= self._hour >= 0, 289 | 59 >= self._minute >= 0, 290 | 59 >= self._second >= 0]) 291 | 292 | elif self._granula == 'day': 293 | self._hour = kwargs.get('hour', 0) 294 | self._minute = kwargs.get('minute', 0) 295 | self._second = kwargs.get('second', 0) 296 | assert all([23 >= self._hour >= 0, 297 | 59 >= self._minute >= 0, 298 | 59 >= self._second >= 0]) 299 | 300 | elif self._granula == 'hour': 301 | self._minute = kwargs.get('minute', 0) 302 | self._second = kwargs.get('second', 0) 303 | assert all([59 >= self._minute >= 0, 304 | 59 >= self._second >= 0]) 305 | 306 | elif self._granula == 'minute': 307 | self._second = kwargs.get('second', 0) 308 | assert all([59 >= self._second >= 0]) 309 | 310 | elif self._granulalist == 'second': 311 | raise Exception("every('second')时不支持at,可使用interval!") 312 | 313 | return self 314 | 315 | def interval(self, num): 316 | """定间隔""" 317 | if self._method: 318 | raise Exception("不可重用interval和at方法.") 319 | if not self._granula: 320 | raise Exception("必须先使用every方法指定频率粒度") 321 | if not isinstance(num, int) or num < 0: 322 | raise Exception("参数num必须为大于0的整数") 323 | self._method = 'interval' 324 | setattr(self, '_' + self._granula, num) 325 | return self 326 | 327 | def begin(self, dtime): 328 | """开始时间,精确到秒""" 329 | if not isinstance(dtime, datetime): 330 | raise Exception("dtime参数必须为datetime类型") 331 | self._begin_time = dtime.replace(microsecond=0) 332 | return self 333 | 334 | def end(self, dtime): 335 | """结束时间,精确到秒""" 336 | if not isinstance(dtime, datetime): 337 | raise Exception("btime参数必须为datetime类型") 338 | self._end_time = dtime.replace(microsecond=0) 339 | return self 340 | 341 | def execute(self, script,script_param, executor='python'): 342 | if not os.path.exists(script): 343 | raise Exception("未找到该脚本:{}".format(script)) 344 | if os.path.splitext(script)[1].lower() != '.py' and executor.lower() =='python': 345 | raise Exception("必须提供正确的执行程序,如python, java, bash等") 346 | j = Job(script,script_param, executor, self) 347 | self._jobs.append(j) 348 | self.__init__() 349 | 350 | def __getstate__(self): 351 | return self._jobs 352 | 353 | def __setstate__(self, state): 354 | self._jobs = state 355 | 356 | 357 | def flushJobs(self, init=False): 358 | if init: 359 | json_jobs = [j.__dict__ for j in self._jobs] 360 | else: 361 | with codecs.open(self.job_config_file, 'r', encoding='utf-8') as f: 362 | json_jobs = f.read() 363 | json_jobs = json.loads(json_jobs) 364 | for jj in json_jobs: 365 | for j in self._jobs: 366 | if jj['job_id'] == j.job_id: 367 | break 368 | jj['status'] = -1 369 | with codecs.open(self.job_config_file, 'w', encoding='utf-8') as f: 370 | json.dump(json_jobs, f, indent=4, ensure_ascii=False, separators=(',', ': '), cls=DateEncoder) 371 | 372 | 373 | def loop(self, queue, debug): 374 | self.flushJobs(init=True) 375 | 376 | self.last_loop_time = datetime.now().replace(microsecond=0) - timedelta(seconds=10) 377 | while True: 378 | now = datetime.now().replace(microsecond=0) 379 | 380 | # 去除已完成的job 381 | pre_job_count = len(self._jobs) 382 | self._jobs = [j for j in self._jobs if j.status == 1] 383 | if pre_job_count != len(self._jobs): 384 | self.flushJobs() 385 | 386 | for j in sorted(self._jobs): 387 | if debug: 388 | j._logger(debug).info("{}".format(str(j))) 389 | 390 | if self.last_loop_time < j.next_time <= now: 391 | if debug: 392 | j._logger(debug).info("put job into queue: {}".format(str(j))) 393 | queue.put(j) 394 | j.gen_next_time(init=False) 395 | j.gen_log_sequence() 396 | elif j.next_time < self.last_loop_time: 397 | j.gen_next_time(init=False) 398 | 399 | self.last_loop_time = now 400 | time.sleep(1) 401 | 402 | 403 | class DateEncoder(json.JSONEncoder): 404 | def default(self, obj): 405 | if isinstance(obj, datetime): 406 | return obj.strftime('%Y-%m-%d %H:%M:%S') 407 | elif obj is None: 408 | return "" 409 | return json.JSONEncoder.default(self, obj) 410 | 411 | def first_runner(queue): 412 | while True: 413 | j = queue.get() 414 | j.run() 415 | 416 | 417 | def second_runner(queue): 418 | while True: 419 | j = queue.get() 420 | j.run() 421 | 422 | 423 | crontab = Crontab() 424 | 425 | def crontab_run(debug=False): 426 | freeze_support() 427 | with Manager() as manager: 428 | queue = manager.Queue() 429 | ps = [ 430 | Process(target=crontab.loop, name="crontab.loop", args=(queue, debug)), 431 | Process(target=first_runner, name="first_runner", args=(queue,)), 432 | Process(target=second_runner, name="second_runner", args=(queue,)) 433 | ] 434 | 435 | for p in ps: 436 | p.daemon = True 437 | p.start() 438 | 439 | while True: 440 | time.sleep(5) 441 | for p in ps: 442 | if not p.is_alive(): 443 | ps.remove(p) 444 | print("terminate: {} {}".format(p.pid, p.name)) 445 | if p.name == 'crontab.loop': 446 | p = Process(target=crontab.loop, name=p.name, args=(queue, debug)) 447 | else: 448 | p = Process(target=globals()[p.name], name=p.name, args=(queue,)) 449 | p.daemon = True 450 | ps.append(p) 451 | p.start() 452 | 453 | 454 | if __name__ == '__main__': 455 | crontab_run() 456 | 457 | --------------------------------------------------------------------------------