├── app
    ├── __init__.py
    ├── base.py
    └── subscene.py
├── .gitignore
├── __init__.py
├── requirements.txt
├── README.md
└── example.py


/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | venv/
3 | .vscode
4 | .idea


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from app.subscene import Subscene
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiofiles==0.4.0
 2 | aiohttp==3.7.4
 3 | async-timeout==3.0.1
 4 | attrs==19.3.0
 5 | autopep8==1.5
 6 | beautifulsoup4==4.8.2
 7 | chardet==3.0.4
 8 | idna==2.9
 9 | isort==4.3.21
10 | lxml==4.6.5
11 | mccabe==0.6.1
12 | multidict==4.7.5
13 | pycodestyle==2.5.0
14 | pydocstyle==5.0.2
15 | pyflakes==2.1.1
16 | pylama==7.7.1
17 | snowballstemmer==2.0.0
18 | soupsieve==2.0
19 | yarl==1.4.2
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fully asynchronous and unofficial API like for [Subscene.com](https://Subscene.com)
 2 | ##### its the scraper that used in [@SubsearchsBot](https://t.me/SubsearchsBot) telegram bot.
 3 | 
 4 | ### what can this script do?
 5 | you can simply use this script for your app or website to search/find/download a subtitle.
 6 | 
 7 | #### supported languages:
 8 | - Persian
 9 | - English
10 | - Arabic
11 | 
12 | #### how it works?
13 | - requirements for this script is in requirements.txt `pip install -r requiremetns.txt`
14 | - note that this script uses asyncio and you need python +3.6
15 | - you can see a handful example in example.py file.
16 | 
17 | ##### contact me:
18 | * email: ebrahimisina78@gmail.com
19 | * telegram: [@Thunderstrack](https://t.me/Thunderstrack)
20 | 
21 | 


--------------------------------------------------------------------------------
/app/base.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import os
 3 | 
 4 | import aiofiles
 5 | import aiohttp
 6 | 
 7 | if os.path.isdir("downloaded") is False:
 8 |     print(f"downloaded directory dose not exist\ncurrent directory: {os.getcwd()}")
 9 |     os.mkdir("downloaded")
10 |     print("downloaded directory created!")
11 | 
12 | 
13 | class Base:
14 |     """
15 |     base class for package
16 |     all requests and responses will be done here.
17 |     """
18 | 
19 |     # send request to subscene and get the response
20 |     async def request(self, session: aiohttp.ClientSession, url: str):
21 |         resp = await session.request('GET', url=url)
22 |         if resp.status != 200:
23 |             await asyncio.sleep(3)  # no need for recursion. its just for too many requests.
24 |             resp = await session.request('GET', url=url)
25 |         return await resp.text()
26 | 
27 |     async def aiorequest(self, url, lang=None):
28 |         if lang is not None:
29 |             language = await self.get_language_filter(lang)
30 |             lang = f"LanguageFilter={language}"
31 |         else:
32 |             lang = ""
33 |         costume_headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
34 |                                          '(KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36', 'cookie': lang}
35 |         async with aiohttp.ClientSession(headers=costume_headers) as session:
36 |             html = await self.request(session, url)
37 |             return html
38 | 
39 |     async def get_language_filter(self, lang):
40 |         languages = {"fa": "46", "en": "13", "ar": "4"}
41 |         lang = lang.replace(lang, languages[lang])
42 | 
43 |         return lang
44 | 
45 |     async def download_file(self, url, file_path):
46 |         async with aiohttp.ClientSession() as session:
47 |             async with session.request('GET', url=url) as resp:
48 |                 if resp.status == 200:
49 |                     file_path = f'downloaded/{file_path}.zip'
50 |                     f = await aiofiles.open(file_path, mode='wb')
51 |                     await f.write(await resp.read())
52 |                     await f.close()
53 |         return file_path
54 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | from app.subscene import Subscene
 2 | import asyncio
 3 | 
 4 | subscene = Subscene()
 5 | 
 6 | 
 7 | async def search(title):
 8 |     # you can give a move/tv show title and its returns a list of results for searched query
 9 |     # example:
10 |     # [{'name': 'True Detective - Third Season', 'link': 'https://subscene.com/subtitles/true-detective-third-season', 'count': 263},
11 |     # {'name': 'True Detective - Second Season', 'link': 'https://subscene.com/subtitles/true-detective-second-season', 'count': 360},]
12 |     result = await subscene.search(title)
13 |     print(result)
14 | 
15 | 
16 | async def get_subtitle_list(url, lang):
17 |     # you can get the url from search and pass it to see the subtitles for that title
18 |     # you can filter the language by passing "fa" for Persian, "en" for English and "ar" for Arabic.
19 |     # example result:
20 |     # {'title': 'True Detective - Third Season', 'subtitles': [{'name': 'True.Detective.Season03.Complete.720p.WEB.H264-METCON',
21 |     # 'link': 'https://subscene.com/subtitles/true-detective-third-season/farsi_persian/1953370',
22 |     # 'owner': 'Arian Drama', 'comments': ''}]}
23 |     result = await subscene.subtitles(url, lang)
24 |     print(result)
25 | 
26 | 
27 | async def download_page(url):
28 |     # get download pages data. you can get the url from get_subtitle_list()
29 |     # after getting the elements you can pass the download link to down(url) and download the subtitle file.
30 |     # its pretty simple like other steps.
31 |     result = await subscene.down_page(url)
32 |     print(result)
33 | 
34 | 
35 | async def down(url):
36 |     file_name = "subtitle1234"  # file name goes here. you can extract it from url or download_page url.
37 |     # finally its become something like: downloaded/subtitle1234.zip (you just need to enter the 'subtitle1234' part)
38 |     resp = await subscene.download(url, file_name)
39 |     # print the path where file saved
40 |     print(resp)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     # search for a title:
45 |     asyncio.run(search("true detective"))
46 | 
47 |     # see a list of one title with language filter
48 |     # asyncio.run(get_subtitle_list("https://subscene.com/subtitles/true-detective-third-season", "fa"))
49 | 
50 |     # get the download page:
51 |     # asyncio.run(download_page("https://subscene.com/subtitles/true-detective-third-season/farsi_persian/1916576"))
52 | 
53 |     # download the file:
54 |     # asyncio.run(down("download link goes here"))
55 | 


--------------------------------------------------------------------------------
/app/subscene.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | from .base import Base
  3 | import re
  4 | 
  5 | 
  6 | class Subscene(Base):
  7 |     async def search(self, title):
  8 |         """
  9 |         search function gives a title and search for it in subscene
 10 |         results will be passed as list of dicts, sample:
 11 |         {"name": "movie/series title", "link": "link to that title", "count": "counts"}
 12 |         """
 13 |         try:
 14 |             url = f"https://subscene.com/subtitles/searchbytitle?query={title}"
 15 |             resp = await self.aiorequest(url=url)  # send request to Subscene
 16 |             if " " in title:
 17 |                 title = title.replace(" ", "+")
 18 |             soup = BeautifulSoup(resp, 'lxml')
 19 |             find_ul = soup.find('div', class_='search-result').find_all('ul')  # .find('ul')
 20 | 
 21 |             subtitles = []  # list for all finded subs to return
 22 |             for ul in find_ul:
 23 |                 find_li = ul.find_all('li')
 24 |                 for li in find_li:
 25 |                     base_li = li.find('div', class_='title').a
 26 |                     name = base_li.text  # movie/show name
 27 |                     link = base_li['href']  # movie/show url
 28 |                     try:  # movie/show subtitle counts
 29 |                         sub_count = li.find('div', class_='subtle count').text
 30 |                     except:
 31 |                         sub_count = li.find('span', class_='subtle count').text
 32 |                     sub_count = re.findall(r'\d+', sub_count)[0]
 33 | 
 34 |                     link = "https://subscene.com" + link
 35 | 
 36 |                     data = {"name": name, "link": link, "count": int(sub_count)}
 37 |                     subtitles.append(data)
 38 | 
 39 |             return subtitles
 40 |         except Exception as e:
 41 |             print(e)
 42 |             return []
 43 | 
 44 |     async def subtitles(self, url, lang=None):
 45 |         try:
 46 |             resp = await self.aiorequest(url, lang)
 47 |             soup = BeautifulSoup(resp, 'lxml')
 48 |             title = soup.find('div', class_='box clearfix').find('div', class_='top left').find('div',
 49 |                                                                                                 class_='header').h2.text
 50 |             try:
 51 |                 title = title.replace("Flag", "")
 52 |                 title = title.replace("Imdb", "").strip()
 53 |             except:
 54 |                 pass
 55 | 
 56 |             table = soup.table.tbody.find_all('tr')
 57 | 
 58 |             subtitles = []
 59 |             for tr in table:
 60 |                 try:
 61 |                     sub_name = tr.find('td', class_='a1').a.find_all('span')[1].text.strip()  # release title
 62 |                 except AttributeError:
 63 |                     continue
 64 |                 sub_link = tr.find('td', class_='a1').a['href']  # release link
 65 |                 try:
 66 |                     sub_owner = tr.find('td', class_='a5').a.text.strip()  # sub owner
 67 |                 except:
 68 |                     sub_owner = "Anonymous"
 69 |                 try:
 70 |                     comments = tr.find('td', class_='a6').text.strip()
 71 |                 except:
 72 |                     comments = ""
 73 |                 sub_link = "https://subscene.com" + sub_link
 74 | 
 75 |                 sub = {"name": sub_name, "link": sub_link, "owner": sub_owner, "comments": comments}
 76 |                 subtitles.append(sub)
 77 | 
 78 |             re_subtitles = {"title": title, "subtitles": subtitles}
 79 |             return re_subtitles
 80 |         except Exception as e:
 81 |             print(e)
 82 |             return []
 83 | 
 84 |     async def down_page(self, url):
 85 |         resp = await self.aiorequest(url)
 86 |         soup = BeautifulSoup(resp, 'lxml')
 87 | 
 88 |         maindiv = soup.body.find('div', class_='subtitle').find('div', class_='top left')
 89 |         title = maindiv.find('div', class_='header').h1.span.text.strip()
 90 |         try:
 91 |             imdb = maindiv.find('div', class_='header').h1.a['href']
 92 |         except TypeError:
 93 |             imdb = ""
 94 |         try:
 95 |             poster = maindiv.find('div', class_='poster').a['href']
 96 |         except:
 97 |             poster = ""
 98 |         try:
 99 |             author_name = maindiv.find('div', class_='header').ul.find('li', class_='author').a.text.strip()
100 |             author_link = f"https://subscene.com{maindiv.find('div', class_='header').ul.find('li', class_='author').a['href']}"
101 |         except:
102 |             author_link = ""
103 |             author_name = "Anonymous"
104 | 
105 |         download_url = f"https://subscene.com{maindiv.find('div', class_='header').ul.find('li', class_='clearfix').find('div', class_='download').a['href']}"
106 | 
107 |         try:
108 |             comments = maindiv.find('div', class_='header').ul.find('li', class_='comment-wrapper').find('div',
109 |                                                                                                          class_='comment').text
110 |         except:
111 |             comments = ""
112 |         try:
113 |             release = maindiv.find('div', class_='header').ul.find('li', class_='release').find_all('div')
114 |             releases = ""
115 |             for i in range(2):
116 |                 r = release[i].text.strip()
117 |                 releases = releases + f"\n{r}"
118 |         except Exception as e:
119 |             releases = ""
120 | 
121 |         response = {"title": title, "imdb": imdb, "poster": poster, "author_name": author_name,
122 |                     "author_url": author_link, "download_url": download_url, "comments": comments, "releases": releases}
123 |         return response
124 | 
125 |     async def download(self, url, file_path):
126 |         resp = await self.download_file(url, file_path)
127 | 
128 |         return resp


--------------------------------------------------------------------------------