├── copy.py ├── techoparkjobs.py └── tgcopycli.py /copy.py: -------------------------------------------------------------------------------- 1 | # Telegram file copying using pyrogram 2 | # pip3 install pyrogram 3 | from pyrogram import Client, filters 4 | from pyrogram.errors import FloodWait 5 | import asyncio 6 | 7 | 8 | FROM = -10012345678 9 | TO = -10012345678 10 | PYRO_SESSION = "" 11 | API_ID = 1234 12 | API_HASH = "abcd1234edfghijkl" 13 | 14 | user = Client( 15 | PYRO_SESSION, 16 | api_id=API_ID, 17 | api_hash=API_HASH, 18 | ) 19 | 20 | @user.on_message(filters.command('copy') & filters.me) 21 | async def copy_files(c, m): 22 | async for msg in c.iter_history(FROM): 23 | try: 24 | if msg.document: # specify the message type (document|video|photo|...) 25 | await msg.copy(TO) 26 | except FloodWait as t: 27 | await asyncio.sleep(t.x) 28 | 29 | user.run() 30 | -------------------------------------------------------------------------------- /techoparkjobs.py: -------------------------------------------------------------------------------- 1 | import re 2 | import csv 3 | import requests 4 | from bs4 import BeautifulSoup 5 | from bs4.element import ResultSet 6 | 7 | 8 | class TechnoParkScaper: 9 | def __init__(self) -> None: 10 | self.header = ["title", "posted on", "closing on", "contact", "descripton", "skills", "company name", "address", "website"] 11 | 12 | def get_soupy_source_code(self, url) -> BeautifulSoup: 13 | res = requests.get(url) 14 | return BeautifulSoup(res.text, "html.parser") 15 | 16 | def job_post_list(self) -> ResultSet: 17 | soup = self.get_soupy_source_code("https://www.technopark.org/job-search") 18 | return soup.find_all("tr", {"class": "companyList"}) 19 | 20 | def scrape_job_details(self, post_result_set: ResultSet) -> list: 21 | title = post_result_set.td.text 22 | url_path = post_result_set.td.a.get("href") 23 | cname = post_result_set.select_one("td:nth-of-type(2)").text 24 | closing_date = post_result_set.select_one("td:nth-of-type(3)").text 25 | if not url_path: 26 | return 27 | print(f"processing - {title} at {cname}") 28 | details_soup = self.get_soupy_source_code(f"http://technopark.org{url_path}") 29 | company_details = details_soup.find("ul", class_="list-sx") 30 | for dlist in company_details.find_all("li"): 31 | if re.search(r"address", dlist.div.text): 32 | dlist.find("div").decompose() 33 | caddress = dlist.text.strip() 34 | elif re.search(r"website", dlist.div.text): 35 | cwebsite = dlist.a.text 36 | 37 | for detail in details_soup.find_all("div", class_="block"): 38 | if head := detail.find("p", class_="head"): 39 | if re.search(r"posted\son.*", head.text, re.IGNORECASE): 40 | posted_child = detail.select_one("p:nth-of-type(2)") 41 | posted_date = posted_child.text 42 | elif re.search(r"contact.*", head.text, re.IGNORECASE): 43 | contact = detail.a.text 44 | elif re.search(r".*description.*", head.text, re.IGNORECASE): 45 | detail.find("p", class_="head").decompose() 46 | description = detail.text.strip() 47 | elif re.search(r"skills.*", head.text, re.IGNORECASE): 48 | detail.find("p", class_="head").decompose() 49 | skills = detail.text.strip() 50 | return [ 51 | title, posted_date, closing_date, 52 | contact, description, skills, cname, 53 | caddress, cwebsite 54 | ] 55 | 56 | def scrape(self): 57 | with open('data.csv', 'w', encoding="UTF-8") as f: 58 | writer = csv.writer(f) 59 | writer.writerow(self.header) 60 | total_job_post_list = self.job_post_list() 61 | print(f"found {len(total_job_post_list)} job posts") 62 | for job_post in total_job_post_list: 63 | job_details = self.scrape_job_details(job_post) 64 | writer.writerow(job_details) 65 | 66 | 67 | sc = TechnoParkScaper() 68 | sc.scrape() 69 | -------------------------------------------------------------------------------- /tgcopycli.py: -------------------------------------------------------------------------------- 1 | """ 2 | A commandline tool for copying telegram files using pyrogram 3 | pip3 install pyrogram 4 | after logging in use /copy in any chat to initialise the process 5 | """ 6 | import sys 7 | import asyncio 8 | import argparse 9 | from pyrogram import Client, filters 10 | from pyrogram.errors import FloodWait 11 | 12 | parser = argparse.ArgumentParser(description='CLI tool for copying files bw two channels') 13 | parser.add_argument( 14 | "-i", "--api-id", help="API id from my.telegram.org", 15 | required=True, type=int 16 | ) 17 | parser.add_argument( 18 | "-a", "--api-hash", help="API hash from my.telegram.org", 19 | required=True 20 | ) 21 | parser.add_argument( 22 | "-s", "--session", help="Pyrogram session string (optional)", 23 | default="anything" 24 | ) 25 | parser.add_argument( 26 | "-f", "--fromc", help="from chat id", 27 | required=True, type=int 28 | ) 29 | parser.add_argument( 30 | "-t", "--toc", help="target chat id", 31 | required=True, type=int 32 | ) 33 | parser.add_argument( 34 | "-l", "--filter", help="type of file which you want to copy", 35 | default="document" 36 | ) 37 | args = parser.parse_args() 38 | 39 | user = Client( 40 | args.session, 41 | api_id=args.api_id, 42 | api_hash=args.api_hash 43 | ) 44 | 45 | 46 | @user.on_message(filters.command('copy') & filters.me) 47 | async def copy_files(client, message): 48 | await message.edit(f"Trying to copy files from __{args.fromc}__ to __{args.toc}__") 49 | await asyncio.sleep(2) 50 | count=0 51 | async for msg in client.search_messages(args.fromc, filter=args.filter): 52 | try: 53 | await msg.copy(args.toc) 54 | except FloodWait as wait: 55 | await asyncio.sleep(wait.x) 56 | except Exception as e: 57 | sys.exit(e) 58 | if count % 10 == 0: 59 | await message.edit(f"copied {count} files") 60 | count += 1 61 | await message.edit(f"Done, {count} file(s) copied") 62 | 63 | if __name__ == "__main": 64 | user.run() 65 | --------------------------------------------------------------------------------