├── copy.py
├── techoparkjobs.py
└── tgcopycli.py


/copy.py:
--------------------------------------------------------------------------------
 1 | # Telegram file copying using pyrogram
 2 | # pip3 install pyrogram
 3 | from pyrogram import Client, filters
 4 | from pyrogram.errors import FloodWait
 5 | import asyncio
 6 | 
 7 | 
 8 | FROM = -10012345678
 9 | TO = -10012345678
10 | PYRO_SESSION = ""
11 | API_ID = 1234
12 | API_HASH = "abcd1234edfghijkl"
13 | 
14 | user = Client(
15 |     PYRO_SESSION,
16 |     api_id=API_ID,
17 |     api_hash=API_HASH,
18 | )
19 | 
20 | @user.on_message(filters.command('copy') & filters.me)
21 | async def copy_files(c, m):
22 |     async for msg in c.iter_history(FROM):
23 |         try:
24 |             if msg.document:  # specify the message type (document|video|photo|...)
25 |                 await msg.copy(TO)
26 |         except FloodWait as t:
27 |             await asyncio.sleep(t.x)
28 |   
29 | user.run()
30 | 


--------------------------------------------------------------------------------
/techoparkjobs.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import csv
 3 | import requests
 4 | from bs4 import BeautifulSoup
 5 | from bs4.element import ResultSet
 6 | 
 7 | 
 8 | class TechnoParkScaper:
 9 |     def __init__(self) -> None:
10 |         self.header = ["title", "posted on", "closing on", "contact", "descripton", "skills", "company name", "address", "website"]
11 | 
12 |     def get_soupy_source_code(self, url) -> BeautifulSoup:
13 |         res = requests.get(url)
14 |         return BeautifulSoup(res.text, "html.parser")
15 | 
16 |     def job_post_list(self) -> ResultSet:
17 |         soup = self.get_soupy_source_code("https://www.technopark.org/job-search")
18 |         return soup.find_all("tr", {"class": "companyList"})
19 | 
20 |     def scrape_job_details(self, post_result_set: ResultSet) -> list:
21 |         title = post_result_set.td.text
22 |         url_path = post_result_set.td.a.get("href")
23 |         cname = post_result_set.select_one("td:nth-of-type(2)").text
24 |         closing_date = post_result_set.select_one("td:nth-of-type(3)").text
25 |         if not url_path:
26 |             return
27 |         print(f"processing - {title} at {cname}")
28 |         details_soup = self.get_soupy_source_code(f"http://technopark.org{url_path}")
29 |         company_details = details_soup.find("ul", class_="list-sx")
30 |         for dlist in company_details.find_all("li"):
31 |             if re.search(r"address", dlist.div.text):
32 |                 dlist.find("div").decompose()
33 |                 caddress = dlist.text.strip()
34 |             elif re.search(r"website", dlist.div.text):
35 |                 cwebsite = dlist.a.text
36 | 
37 |         for detail in details_soup.find_all("div", class_="block"):
38 |             if head := detail.find("p", class_="head"):
39 |                 if re.search(r"posted\son.*", head.text, re.IGNORECASE):
40 |                     posted_child = detail.select_one("p:nth-of-type(2)")
41 |                     posted_date = posted_child.text
42 |                 elif re.search(r"contact.*", head.text, re.IGNORECASE):
43 |                     contact = detail.a.text
44 |                 elif re.search(r".*description.*", head.text, re.IGNORECASE):
45 |                     detail.find("p", class_="head").decompose()
46 |                     description = detail.text.strip()
47 |                 elif re.search(r"skills.*", head.text, re.IGNORECASE):
48 |                     detail.find("p", class_="head").decompose()
49 |                     skills = detail.text.strip()
50 |         return [
51 |             title, posted_date, closing_date,
52 |             contact, description, skills, cname,
53 |             caddress, cwebsite
54 |         ]
55 | 
56 |     def scrape(self):
57 |         with open('data.csv', 'w', encoding="UTF-8") as f:
58 |             writer = csv.writer(f)
59 |             writer.writerow(self.header)
60 |             total_job_post_list = self.job_post_list()
61 |             print(f"found {len(total_job_post_list)} job posts")
62 |             for job_post in total_job_post_list:
63 |                 job_details = self.scrape_job_details(job_post)
64 |                 writer.writerow(job_details)
65 | 
66 | 
67 | sc = TechnoParkScaper()
68 | sc.scrape()
69 | 


--------------------------------------------------------------------------------
/tgcopycli.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A commandline tool for copying telegram files using pyrogram
 3 | pip3 install pyrogram
 4 | after logging in use /copy in any chat to initialise the process
 5 | """
 6 | import sys
 7 | import asyncio
 8 | import argparse
 9 | from pyrogram import Client, filters
10 | from pyrogram.errors import FloodWait
11 | 
12 | parser = argparse.ArgumentParser(description='CLI tool for copying files bw two channels')
13 | parser.add_argument(
14 |     "-i", "--api-id", help="API id from my.telegram.org",
15 |     required=True, type=int
16 | )
17 | parser.add_argument(
18 |     "-a", "--api-hash", help="API hash from my.telegram.org",
19 |     required=True
20 | )
21 | parser.add_argument(
22 |     "-s", "--session", help="Pyrogram session string (optional)",
23 |     default="anything"
24 | )
25 | parser.add_argument(
26 |     "-f", "--fromc", help="from chat id",
27 |     required=True, type=int
28 | )
29 | parser.add_argument(
30 |     "-t", "--toc", help="target chat id",
31 |     required=True, type=int
32 | )
33 | parser.add_argument(
34 |     "-l", "--filter", help="type of file which you want to copy",
35 |     default="document"
36 | )
37 | args = parser.parse_args()
38 | 
39 | user = Client(
40 |     args.session,
41 |     api_id=args.api_id,
42 |     api_hash=args.api_hash
43 | )
44 | 
45 | 
46 | @user.on_message(filters.command('copy') & filters.me)
47 | async def copy_files(client, message):
48 |     await message.edit(f"Trying to copy files from __{args.fromc}__ to __{args.toc}__")
49 |     await asyncio.sleep(2)
50 |     count=0
51 |     async for msg in client.search_messages(args.fromc, filter=args.filter):
52 |         try:
53 |             await msg.copy(args.toc)
54 |         except FloodWait as wait:
55 |             await asyncio.sleep(wait.x)
56 |         except Exception as e:
57 |             sys.exit(e)
58 |         if count % 10 == 0:
59 |             await message.edit(f"copied {count} files")
60 |         count += 1
61 |     await message.edit(f"Done, {count} file(s) copied")
62 | 
63 | if __name__ == "__main":
64 |     user.run()
65 | 


--------------------------------------------------------------------------------