├── app ├── __init__.py ├── base.py └── subscene.py ├── .gitignore ├── __init__.py ├── requirements.txt ├── README.md └── example.py /app/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | venv/ 3 | .vscode 4 | .idea -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from app.subscene import Subscene 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==0.4.0 2 | aiohttp==3.7.4 3 | async-timeout==3.0.1 4 | attrs==19.3.0 5 | autopep8==1.5 6 | beautifulsoup4==4.8.2 7 | chardet==3.0.4 8 | idna==2.9 9 | isort==4.3.21 10 | lxml==4.6.5 11 | mccabe==0.6.1 12 | multidict==4.7.5 13 | pycodestyle==2.5.0 14 | pydocstyle==5.0.2 15 | pyflakes==2.1.1 16 | pylama==7.7.1 17 | snowballstemmer==2.0.0 18 | soupsieve==2.0 19 | yarl==1.4.2 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fully asynchronous and unofficial API like for [Subscene.com](https://Subscene.com) 2 | ##### its the scraper that used in [@SubsearchsBot](https://t.me/SubsearchsBot) telegram bot. 3 | 4 | ### what can this script do? 5 | you can simply use this script for your app or website to search/find/download a subtitle. 6 | 7 | #### supported languages: 8 | - Persian 9 | - English 10 | - Arabic 11 | 12 | #### how it works? 13 | - requirements for this script is in requirements.txt `pip install -r requiremetns.txt` 14 | - note that this script uses asyncio and you need python +3.6 15 | - you can see a handful example in example.py file. 16 | 17 | ##### contact me: 18 | * email: ebrahimisina78@gmail.com 19 | * telegram: [@Thunderstrack](https://t.me/Thunderstrack) 20 | 21 | -------------------------------------------------------------------------------- /app/base.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | 4 | import aiofiles 5 | import aiohttp 6 | 7 | if os.path.isdir("downloaded") is False: 8 | print(f"downloaded directory dose not exist\ncurrent directory: {os.getcwd()}") 9 | os.mkdir("downloaded") 10 | print("downloaded directory created!") 11 | 12 | 13 | class Base: 14 | """ 15 | base class for package 16 | all requests and responses will be done here. 17 | """ 18 | 19 | # send request to subscene and get the response 20 | async def request(self, session: aiohttp.ClientSession, url: str): 21 | resp = await session.request('GET', url=url) 22 | if resp.status != 200: 23 | await asyncio.sleep(3) # no need for recursion. its just for too many requests. 24 | resp = await session.request('GET', url=url) 25 | return await resp.text() 26 | 27 | async def aiorequest(self, url, lang=None): 28 | if lang is not None: 29 | language = await self.get_language_filter(lang) 30 | lang = f"LanguageFilter={language}" 31 | else: 32 | lang = "" 33 | costume_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' 34 | '(KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', 'cookie': lang} 35 | async with aiohttp.ClientSession(headers=costume_headers) as session: 36 | html = await self.request(session, url) 37 | return html 38 | 39 | async def get_language_filter(self, lang): 40 | languages = {"fa": "46", "en": "13", "ar": "4"} 41 | lang = lang.replace(lang, languages[lang]) 42 | 43 | return lang 44 | 45 | async def download_file(self, url, file_path): 46 | async with aiohttp.ClientSession() as session: 47 | async with session.request('GET', url=url) as resp: 48 | if resp.status == 200: 49 | file_path = f'downloaded/{file_path}.zip' 50 | f = await aiofiles.open(file_path, mode='wb') 51 | await f.write(await resp.read()) 52 | await f.close() 53 | return file_path 54 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from app.subscene import Subscene 2 | import asyncio 3 | 4 | subscene = Subscene() 5 | 6 | 7 | async def search(title): 8 | # you can give a move/tv show title and its returns a list of results for searched query 9 | # example: 10 | # [{'name': 'True Detective - Third Season', 'link': 'https://subscene.com/subtitles/true-detective-third-season', 'count': 263}, 11 | # {'name': 'True Detective - Second Season', 'link': 'https://subscene.com/subtitles/true-detective-second-season', 'count': 360},] 12 | result = await subscene.search(title) 13 | print(result) 14 | 15 | 16 | async def get_subtitle_list(url, lang): 17 | # you can get the url from search and pass it to see the subtitles for that title 18 | # you can filter the language by passing "fa" for Persian, "en" for English and "ar" for Arabic. 19 | # example result: 20 | # {'title': 'True Detective - Third Season', 'subtitles': [{'name': 'True.Detective.Season03.Complete.720p.WEB.H264-METCON', 21 | # 'link': 'https://subscene.com/subtitles/true-detective-third-season/farsi_persian/1953370', 22 | # 'owner': 'Arian Drama', 'comments': ''}]} 23 | result = await subscene.subtitles(url, lang) 24 | print(result) 25 | 26 | 27 | async def download_page(url): 28 | # get download pages data. you can get the url from get_subtitle_list() 29 | # after getting the elements you can pass the download link to down(url) and download the subtitle file. 30 | # its pretty simple like other steps. 31 | result = await subscene.down_page(url) 32 | print(result) 33 | 34 | 35 | async def down(url): 36 | file_name = "subtitle1234" # file name goes here. you can extract it from url or download_page url. 37 | # finally its become something like: downloaded/subtitle1234.zip (you just need to enter the 'subtitle1234' part) 38 | resp = await subscene.download(url, file_name) 39 | # print the path where file saved 40 | print(resp) 41 | 42 | 43 | if __name__ == "__main__": 44 | # search for a title: 45 | asyncio.run(search("true detective")) 46 | 47 | # see a list of one title with language filter 48 | # asyncio.run(get_subtitle_list("https://subscene.com/subtitles/true-detective-third-season", "fa")) 49 | 50 | # get the download page: 51 | # asyncio.run(download_page("https://subscene.com/subtitles/true-detective-third-season/farsi_persian/1916576")) 52 | 53 | # download the file: 54 | # asyncio.run(down("download link goes here")) 55 | -------------------------------------------------------------------------------- /app/subscene.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from .base import Base 3 | import re 4 | 5 | 6 | class Subscene(Base): 7 | async def search(self, title): 8 | """ 9 | search function gives a title and search for it in subscene 10 | results will be passed as list of dicts, sample: 11 | {"name": "movie/series title", "link": "link to that title", "count": "counts"} 12 | """ 13 | try: 14 | url = f"https://subscene.com/subtitles/searchbytitle?query={title}" 15 | resp = await self.aiorequest(url=url) # send request to Subscene 16 | if " " in title: 17 | title = title.replace(" ", "+") 18 | soup = BeautifulSoup(resp, 'lxml') 19 | find_ul = soup.find('div', class_='search-result').find_all('ul') # .find('ul') 20 | 21 | subtitles = [] # list for all finded subs to return 22 | for ul in find_ul: 23 | find_li = ul.find_all('li') 24 | for li in find_li: 25 | base_li = li.find('div', class_='title').a 26 | name = base_li.text # movie/show name 27 | link = base_li['href'] # movie/show url 28 | try: # movie/show subtitle counts 29 | sub_count = li.find('div', class_='subtle count').text 30 | except: 31 | sub_count = li.find('span', class_='subtle count').text 32 | sub_count = re.findall(r'\d+', sub_count)[0] 33 | 34 | link = "https://subscene.com" + link 35 | 36 | data = {"name": name, "link": link, "count": int(sub_count)} 37 | subtitles.append(data) 38 | 39 | return subtitles 40 | except Exception as e: 41 | print(e) 42 | return [] 43 | 44 | async def subtitles(self, url, lang=None): 45 | try: 46 | resp = await self.aiorequest(url, lang) 47 | soup = BeautifulSoup(resp, 'lxml') 48 | title = soup.find('div', class_='box clearfix').find('div', class_='top left').find('div', 49 | class_='header').h2.text 50 | try: 51 | title = title.replace("Flag", "") 52 | title = title.replace("Imdb", "").strip() 53 | except: 54 | pass 55 | 56 | table = soup.table.tbody.find_all('tr') 57 | 58 | subtitles = [] 59 | for tr in table: 60 | try: 61 | sub_name = tr.find('td', class_='a1').a.find_all('span')[1].text.strip() # release title 62 | except AttributeError: 63 | continue 64 | sub_link = tr.find('td', class_='a1').a['href'] # release link 65 | try: 66 | sub_owner = tr.find('td', class_='a5').a.text.strip() # sub owner 67 | except: 68 | sub_owner = "Anonymous" 69 | try: 70 | comments = tr.find('td', class_='a6').text.strip() 71 | except: 72 | comments = "" 73 | sub_link = "https://subscene.com" + sub_link 74 | 75 | sub = {"name": sub_name, "link": sub_link, "owner": sub_owner, "comments": comments} 76 | subtitles.append(sub) 77 | 78 | re_subtitles = {"title": title, "subtitles": subtitles} 79 | return re_subtitles 80 | except Exception as e: 81 | print(e) 82 | return [] 83 | 84 | async def down_page(self, url): 85 | resp = await self.aiorequest(url) 86 | soup = BeautifulSoup(resp, 'lxml') 87 | 88 | maindiv = soup.body.find('div', class_='subtitle').find('div', class_='top left') 89 | title = maindiv.find('div', class_='header').h1.span.text.strip() 90 | try: 91 | imdb = maindiv.find('div', class_='header').h1.a['href'] 92 | except TypeError: 93 | imdb = "" 94 | try: 95 | poster = maindiv.find('div', class_='poster').a['href'] 96 | except: 97 | poster = "" 98 | try: 99 | author_name = maindiv.find('div', class_='header').ul.find('li', class_='author').a.text.strip() 100 | author_link = f"https://subscene.com{maindiv.find('div', class_='header').ul.find('li', class_='author').a['href']}" 101 | except: 102 | author_link = "" 103 | author_name = "Anonymous" 104 | 105 | download_url = f"https://subscene.com{maindiv.find('div', class_='header').ul.find('li', class_='clearfix').find('div', class_='download').a['href']}" 106 | 107 | try: 108 | comments = maindiv.find('div', class_='header').ul.find('li', class_='comment-wrapper').find('div', 109 | class_='comment').text 110 | except: 111 | comments = "" 112 | try: 113 | release = maindiv.find('div', class_='header').ul.find('li', class_='release').find_all('div') 114 | releases = "" 115 | for i in range(2): 116 | r = release[i].text.strip() 117 | releases = releases + f"\n{r}" 118 | except Exception as e: 119 | releases = "" 120 | 121 | response = {"title": title, "imdb": imdb, "poster": poster, "author_name": author_name, 122 | "author_url": author_link, "download_url": download_url, "comments": comments, "releases": releases} 123 | return response 124 | 125 | async def download(self, url, file_path): 126 | resp = await self.download_file(url, file_path) 127 | 128 | return resp --------------------------------------------------------------------------------