├── .gitignore ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── audiobook ├── data.csv ├── main.py ├── processed.json ├── requirements.txt ├── search ├── search.py ├── searchyt.py └── utils.py ├── setup.py ├── youtube.py └── ytbooklist.csv /.gitignore: -------------------------------------------------------------------------------- 1 | /.DS_Store 2 | /venv 3 | /__pycache__ 4 | /search/__pycache__ 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Priyam Srivastava 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | thefuzz = "*" 8 | requests = "*" 9 | bs4 = "*" 10 | pytube = "*" 11 | 12 | [dev-packages] 13 | 14 | [requires] 15 | python_version = "3.9" 16 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "518a11425804f42caf8fa56fb5bcc4e201ec98b7b41a00ff44e8d5b02baea26c" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.9" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "beautifulsoup4": { 20 | "hashes": [ 21 | "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf", 22 | "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891" 23 | ], 24 | "markers": "python_version >= '3.1'", 25 | "version": "==4.10.0" 26 | }, 27 | "bs4": { 28 | "hashes": [ 29 | "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a" 30 | ], 31 | "index": "pypi", 32 | "version": "==0.0.1" 33 | }, 34 | "certifi": { 35 | "hashes": [ 36 | "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", 37 | "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" 38 | ], 39 | "version": "==2021.10.8" 40 | }, 41 | "charset-normalizer": { 42 | "hashes": [ 43 | "sha256:2857e29ff0d34db842cd7ca3230549d1a697f96ee6d3fb071cfa6c7393832597", 44 | "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df" 45 | ], 46 | "markers": "python_version >= '3'", 47 | "version": "==2.0.12" 48 | }, 49 | "idna": { 50 | "hashes": [ 51 | "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", 52 | "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" 53 | ], 54 | "markers": "python_version >= '3'", 55 | "version": "==3.3" 56 | }, 57 | "pytube": { 58 | "hashes": [ 59 | "sha256:422a9c2e1d35521f00c692b59c18edbaf2c1e12197bd36745bf00a26875674db", 60 | "sha256:e6cb158ab16f747eae490a5f49233ddb1f111a425c014fb8a9cda7fcb806a689" 61 | ], 62 | "index": "pypi", 63 | "version": "==12.0.0" 64 | }, 65 | "requests": { 66 | "hashes": [ 67 | "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61", 68 | "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d" 69 | ], 70 | "index": "pypi", 71 | "version": "==2.27.1" 72 | }, 73 | "soupsieve": { 74 | "hashes": [ 75 | "sha256:1a3cca2617c6b38c0343ed661b1fa5de5637f257d4fe22bd9f1338010a1efefb", 76 | "sha256:b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9" 77 | ], 78 | "markers": "python_version >= '3.6'", 79 | "version": "==2.3.1" 80 | }, 81 | "thefuzz": { 82 | "hashes": [ 83 | "sha256:4fcdde8e40f5ca5e8106bc7665181f9598a9c8b18b0a4d38c41a095ba6788972", 84 | "sha256:6f7126db2f2c8a54212b05e3a740e45f4291c497d75d20751728f635bb74aa3d" 85 | ], 86 | "index": "pypi", 87 | "version": "==0.19.0" 88 | }, 89 | "urllib3": { 90 | "hashes": [ 91 | "sha256:000ca7f471a233c2251c6c7023ee85305721bfdf18621ebff4fd17a8653427ed", 92 | "sha256:0e7c33d9a63e7ddfcb86780aac87befc2fbddf46c58dbb487e0855f7ceec283c" 93 | ], 94 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 95 | "version": "==1.26.8" 96 | } 97 | }, 98 | "develop": {} 99 | } 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Audiobook Downloader 2 | 3 | 4 | 5 | https://user-images.githubusercontent.com/79325116/154844191-2dd83525-8717-4d93-b60f-bb626881edc9.mp4 6 | 7 | 8 | 9 | 10 | 11 | Downloading and searching for audiobooks super-easily 12 | *--Might be buggy--* 13 | 14 | Please suggest any changes by opening issues 15 | Gonna add torrent option soon 16 | # Install 17 | > Install git and python 18 | 19 | `git clone https://github.com/ipriyam26/Audiobook.git && cd Audiobook` 20 | 21 | `python setup.py` 22 | 23 | # Use 24 | > python main.py 25 | 26 | -------------------------------------------------------------------------------- /audiobook: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | /Users/ipriyam26/Programing/PycharmProjects/Audiobook/venv/bin/python /Users/ipriyam26/Programing/PycharmProjects/Audiobook/main.py 3 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import search.search as search 2 | import search.searchyt as searchyt 3 | import youtube 4 | import sys 5 | 6 | try: 7 | args = sys.argv[1] 8 | if args in ['-yt', '-o']: 9 | searchyt.SearchYT(flag=args) 10 | elif args == '-u': 11 | youtube.UploadYoutube() 12 | else: 13 | print("No such flag") 14 | 15 | 16 | except Exception: 17 | search.IndexSearch() 18 | 19 | 20 | -------------------------------------------------------------------------------- /processed.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ipriyam26/Audiobook/072fc5f2fef5e7f02052f55461781809997269fa/processed.json -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # These requirements were autogenerated by pipenv 3 | # To regenerate from the project's Pipfile, run: 4 | # 5 | # pipenv lock --requirements 6 | # 7 | 8 | -i https://pypi.org/simple 9 | beautifulsoup4==4.10.0; python_version >= '3.1' 10 | bs4==0.0.1 11 | certifi==2021.10.8 12 | charset-normalizer==2.0.12; python_version >= '3' 13 | idna==3.3; python_version >= '3' 14 | pytube==12.0.0 15 | requests==2.27.1 16 | soupsieve==2.3.1; python_version >= '3.6' 17 | thefuzz==0.19.0 18 | urllib3==1.26.8; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4' 19 | -------------------------------------------------------------------------------- /search/search.py: -------------------------------------------------------------------------------- 1 | from thefuzz import process, fuzz 2 | import search.utils as utils 3 | import threading 4 | import requests 5 | import os 6 | import re 7 | from bs4 import BeautifulSoup 8 | import search.searchyt as YT 9 | 10 | class IndexSearch: 11 | 12 | def __init__(self) -> None: 13 | self.search = input("Enter book name: ") 14 | self.full_length_audiobook() 15 | 16 | def get_link(self): 17 | titles = [] 18 | titles_and_paths = utils.get_dictonary('/Users/ipriyam26/Programing/PycharmProjects/Audiobook/data.csv') 19 | titles = list(titles_and_paths.values()) 20 | titles.pop(0) 21 | matches = list(process.extract(self.search, titles, limit=10, scorer=fuzz.token_set_ratio)) 22 | 23 | if matches.__len__() == 0: 24 | print("No match found") 25 | return "No match found" 26 | for i, match in enumerate(matches, start=1): 27 | print(f"[{i}] {match[0]}") 28 | n = int(input("Please Pick one to download \nor 0 to search other index: ")) - 1 29 | 30 | while n > 11 or n < -1: 31 | n = int(input("Please Pick one to download: ")) - 1 32 | return "No match found" if n < 0 else utils.get_key(matches[n][0], titles_and_paths) 33 | 34 | 35 | def download(self,filename,downLink,path): 36 | p = os.path.join(path,filename) 37 | print(f"Downloading.... {filename}") 38 | doc = requests.get(downLink) 39 | with open(p, 'wb') as f: 40 | f.write(doc.content) 41 | print("Downloaded.... ",filename) 42 | 43 | def full_length_audiobook(self): 44 | link = self.get_link() 45 | if(link=="No match found"): 46 | YT.SearchYT(search=self.search,flag='-yt') 47 | else: 48 | r = requests.get(link) 49 | soup = BeautifulSoup(r.content, 'html.parser') 50 | bookName = link.split('/')[3] 51 | parent_path = os.getcwd() 52 | path = os.path.join(parent_path,bookName) 53 | os.makedirs(path) 54 | for a in soup.find_all('a', href=re.compile(r'http.*\.mp3')): 55 | filename = a['href'][a['href'].rfind("/")+1:] 56 | downLink = a['href'] 57 | threading.Thread(target=self.download,args=(filename,downLink,path)).start() 58 | 59 | 60 | # print("Downloaded all the files") 61 | # os.system(f"gupload {bookName}") 62 | -------------------------------------------------------------------------------- /search/searchyt.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | from re import I 5 | from thefuzz import process, fuzz 6 | import search.utils as utils 7 | from pytube import YouTube,Search 8 | import time 9 | 10 | from youtube import UploadYoutube 11 | 12 | class SearchYT: 13 | 14 | def __init__(self, search, flag) -> None: 15 | self.search = search 16 | if flag == '-yt' or flag is None: 17 | self.youtube() 18 | else: 19 | self.yt_search() 20 | 21 | def download(self, link): 22 | yt = YouTube(link) 23 | try: 24 | video = yt.streams.filter(only_audio=True).first() 25 | out_file = video.download(output_path='.') 26 | base, ext = os.path.splitext(out_file) 27 | new_file = f'{base}.mp3' 28 | os.rename(out_file, new_file) 29 | print(f"{yt.title} has been successfully downloaded.") 30 | except Exception: 31 | print("Unsuccessful") 32 | 33 | # result of success 34 | 35 | 36 | def get_link(self): 37 | titles_and_paths = utils.get_dictonary('/Users/ipriyam26/Programing/PycharmProjects/Audiobook/ytbooklist.csv') 38 | titles = list(titles_and_paths.values()) 39 | matches = list(process.extract(self.search, titles, limit=10, scorer=fuzz.token_set_ratio)) 40 | 41 | i = 1 42 | if matches.__len__() == 0 or matches[0][1] < 75: 43 | print("No match found") 44 | return "No match found" 45 | if matches[0][1] == 100: 46 | for match in matches: 47 | if match[1] < 90: 48 | break 49 | print(f"[{i}] {match[0]}") 50 | i += 1 51 | else: 52 | for match in matches: 53 | if match[1] < 75: 54 | break 55 | print(f"[{i}] {match[0]}") 56 | i += 1 57 | n = int(input("Please Pick one to download Enter 0 for other index")) - 1 58 | while n < 0 or n > i: 59 | n = int(input("Please Pick one to download: ")) - 1 60 | if n == 0: 61 | self.yt_search() 62 | else: 63 | return utils.get_key(matches[n][0], titles_and_paths) 64 | 65 | 66 | def youtube(self): 67 | print("Trying to locate in youtube....") 68 | link = self.get_link() 69 | if(link == "No match found"): 70 | self.yt_search() 71 | else: 72 | self.download(link) 73 | 74 | 75 | 76 | 77 | 78 | def yt_search(self): 79 | print("Looking online...") 80 | yt = Search(f'{self.search}*full audiobook*') 81 | len(yt.results) 82 | results = [result for result in yt.results if result.length > 3600] 83 | title_length = {video.title: video.length for video in results} 84 | link = [video.watch_url for video in results] 85 | i = 1 86 | for title in title_length: 87 | length = time.strftime("%Hh %Mm", time.gmtime(int(title_length[title]))) 88 | print(f"[{i}] {title} - {length}") 89 | i += 1 90 | n = int(input(f"Enter Selection: [1-{i}] Enter 0 to quit: ")) 91 | while n > i or n < 0: 92 | n = int(input(f"Please enter a number between [1-{i}] Enter Selection again: ")) 93 | 94 | if n == 0: 95 | return 96 | n -= 1 97 | self.download(link[n]) 98 | UploadYoutube(link=link[n]) 99 | -------------------------------------------------------------------------------- /search/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | def get_dictonary(file): 4 | titles_and_paths = {} 5 | with open(file,'r',encoding='utf-8') as f: 6 | data = csv.reader(f) 7 | next(data) 8 | for row in data: 9 | if(row[0] == " "): 10 | continue 11 | titles_and_paths[row[1]] = row[0].strip() 12 | return titles_and_paths 13 | 14 | def get_key(val,dict): 15 | for key, value in dict.items(): 16 | if val == value: 17 | return key 18 | 19 | return "key doesn't exist" -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.system('pip install -r requirements.txt') -------------------------------------------------------------------------------- /youtube.py: -------------------------------------------------------------------------------- 1 | 2 | # coding=utf-8 3 | 4 | from pytube import YouTube,Channel,Playlist 5 | import re 6 | import csv 7 | 8 | class UploadYoutube: 9 | def __init__(self,link) -> None: 10 | link = input("Please input link: ") if link == None else link 11 | self.source_processor(link) 12 | 13 | def clean(self,text): 14 | text = text.replace("Greatest🌟AudioBooks","").replace("🎧📖","").replace("|","").replace("-","").replace("AudioBook","").strip() 15 | regrex_pattern = re.compile(pattern = "[" 16 | u"\U0001F600-\U0001F64F" # emoticons 17 | u"\U0001F300-\U0001F5FF" # symbols & pictographs 18 | u"\U0001F680-\U0001F6FF" # transport & map symbols 19 | u"\U0001F1E0-\U0001F1FF" # flags (iOS) 20 | "]+", flags = re.UNICODE) 21 | title= regrex_pattern.sub(r'',text) 22 | return title 23 | 24 | def add_all(self,f,object): 25 | i,j=0,0 26 | writer = csv.writer(f) 27 | for video in object.videos: 28 | print(f"seen {j}") 29 | j+=1 30 | if(video.length < 3000): 31 | continue 32 | title = video.title.replace("/","").strip() 33 | writer.writerow([title, video.watch_url]) 34 | print(f"{title} added to the list {i}") 35 | i+=1 36 | 37 | def add_video(self,f,video_link): 38 | video = YouTube(video_link) 39 | writer = csv.writer(f) 40 | title = video.title.replace("/","").strip() 41 | writer.writerow([title,video.watch_url]) 42 | print(f"{title} added to the list") 43 | 44 | 45 | def source_processor(self,source): 46 | with open('ytbooklist.csv', 'a') as f: 47 | selector = source.replace("https://www.youtube.com/","")[0] 48 | if "p" == selector: 49 | obj = Playlist(source) 50 | self.add_all(f,obj) 51 | elif "c" == selector: 52 | obj = Channel(source) 53 | self.add_all(f,obj) 54 | else: 55 | self.add_video(f,source) 56 | 57 | 58 | 59 | --------------------------------------------------------------------------------