├── .idea ├── .gitignore ├── Anime_Scraper.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── .ipynb_checkpoints └── Untitled-checkpoint.ipynb ├── Procfile ├── README.md ├── Untitled.ipynb ├── app.py ├── extractor └── __init__.py ├── geckodriver.log ├── model ├── AnimeEpisode.py ├── AnimeSearch.py ├── anime.py └── model.py ├── requirements.txt ├── sites ├── ChiaAnime.py ├── __init__.py ├── anime1.py ├── gogoanime.py └── nine_anime.py └── test.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml 3 | -------------------------------------------------------------------------------- /.idea/Anime_Scraper.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import requests\n", 10 | "import json\n", 11 | "import os\n", 12 | "from bs4 import BeautifulSoup\n", 13 | "ANIME_LIST_URL=\"https://animeflix.io/shows/null-peta/episode-7-538426/sub\"" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 3, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/plain": [ 24 | "" 25 | ] 26 | }, 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "output_type": "execute_result" 30 | } 31 | ], 32 | "source": [ 33 | "page = requests.get(ANIME_LIST_URL)\n", 34 | "page." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [] 43 | } 44 | ], 45 | "metadata": { 46 | "kernelspec": { 47 | "display_name": "Python 3", 48 | "language": "python", 49 | "name": "python3" 50 | }, 51 | "language_info": { 52 | "codemirror_mode": { 53 | "name": "ipython", 54 | "version": 3 55 | }, 56 | "file_extension": ".py", 57 | "mimetype": "text/x-python", 58 | "name": "python", 59 | "nbconvert_exporter": "python", 60 | "pygments_lexer": "ipython3", 61 | "version": "3.7.3" 62 | } 63 | }, 64 | "nbformat": 4, 65 | "nbformat_minor": 2 66 | } 67 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn app:app 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Anime Scraper:- 2 | 3 | ![website](https://img.shields.io/badge/website-up-green) ![Maintenance](https://img.shields.io/badge/Maintained%3F-No-red.svg) ![Build Status](https://travis-ci.org/joemccann/dillinger.svg?branch=master) 4 | ![GitHub last commit](https://img.shields.io/github/last-commit/rawkush/Anime_Scraper?style=plastic) 5 | ![GitHub Release Date](https://img.shields.io/github/release-date/rawkush/Anime_Scraper?style=plastic) ![Lines of code](https://img.shields.io/tokei/lines/github/rawkush/Anime_Scraper?style=plastic) 6 | ![GitHub issues](https://img.shields.io/github/issues/rawkush/Anime_Scraper?style=plastic) ![GitHub closed issues](https://img.shields.io/github/issues-closed/rawkush/Anime_Scraper?style=plastic) ![GitHub forks](https://img.shields.io/github/forks/rawkush/Anime_Scraper?style=social) 7 | ![GitHub Repo stars](https://img.shields.io/github/stars/rawkush/Anime_Scraper?style=social) 8 | ![GitHub watchers](https://img.shields.io/github/watchers/rawkush/Anime_Scraper?style=social)
![GitHub all releases](https://img.shields.io/github/downloads/rawkush/Anime_Scraper/total?style=plastic)
9 | 10 | ![GitHub](https://img.shields.io/github/license/rawkush/Anime_Scraper?style=plastic) 11 | ![ViewCount](https://views.whatilearened.today/views/github/rawkush/anime_scraper.svg) 12 | 13 | Anime Scraper is a library that provides unofficial api to scrape the diiferent anime streaming platform like gogoanime, nine anime website returns the data as JSON Object. This repository is broken into two pieces one contains java scraper and other contains python scraper. 14 |
This project is created fo the purpose of using it into RawAnime.
15 | If you are using our project feel free to give us credit :) 16 | 17 | 18 | #### java library is can be used by downloading and importing it into your application, however our api made from python is the only one maintained and supports multiple servers 19 | 20 | 21 | # How to Use java lib 22 | - Import this library to your project 23 | 24 | #### To Get list of recently updated animes 25 | 26 | 27 | ``` 28 | // initalize the variable 29 | Scraper animes= new Scraper(); 30 | animes.getRecentlyUpdated(); // returns JSON Object 31 | ``` 32 | 33 | #### Popular Animes 34 | ``` 35 | animes.getPopularOngoing(); // returns JSON Object 36 | ``` 37 | 38 | #### Search Animes 39 | ``` 40 | animes.searchAnime(animeName); // animeName is a String 41 | ``` 42 | 43 | ### Episodes 44 | 45 | ``` 46 | animes.getAllEpisodes(animeUrl); // animeUrl is a String 47 | ``` 48 | 49 | ### Video link from different Servers 50 | 51 | ``` 52 | animes.getServers(url); // url is a String 53 | ``` 54 | 55 | ##### For complete code example checkout the demo.java file in the repository 56 | 57 | ## Screenshot of JSON 58 | ![enter image description here](https://user-images.githubusercontent.com/25636146/48275506-a83cf180-e46b-11e8-9263-52fcba01b560.png) 59 | 60 | ## Library Used 61 | - Jsoup 62 | - java-JSON 63 | 64 | # For Python API 65 | 66 | Make the post request on the given url, the data post should be in JSON 67 | 68 | ```url = 'https://rawanime.herokuapp.com/' ``` 69 | and in that JSON intent key whould be present which specifies what api to call, example of json given below 70 | 71 | ### For recently updated anime 72 | ``` 73 | { 74 | "intent":"new" 75 | } 76 | ``` 77 | ### For searching anime 78 | ``` 79 | { 80 | "intent":"search" 81 | "anime": "naruto shippuden" 82 | } 83 | 84 | ``` 85 | 86 | ### Get details of specific anime 87 | 88 | ``` 89 | { 90 | "intent":"getAnime" 91 | "url":"http//:gogoanime.com/naruto" 92 | } 93 | 94 | ``` 95 | 96 | ### Get video link 97 | ``` 98 | { 99 | "intent":"episode" 100 | "url": "http//:www.gogoanime.com/naruto/" 101 | "episode_number":"13" 102 | } 103 | 104 | ``` 105 | # Note: 106 | 107 | The above key values are required for the api to work, and all requests are handled by the same base url, for different functionality just the data posted will be changed as suggested above 108 | 109 | 110 | # Projects Using GogoScraper 111 | - [RawAnime](https://github.com/Rawkush/RawAnime) 112 | 113 | # Contribution 114 | 115 | Want to contribute? Great! 116 | 117 | Fork your own copy of this repository, make changes, update, fix bugs, add additional features or just prettify the code and create a pull request explaining what have you added fixed or improved so that we can merge it to our branch. 118 | 119 | 120 | 121 | 122 | This library is being developed to be used in [RawAnime ](https://github.com/Rawkush/RawAnime) Andriod application, but feel free to use it in your project. 123 | 124 | This Library is created for education purpose only and we shall not be responsible for its misuse. 125 | 126 | #### If you are using our library please do tell us so we can add your project link. 127 | 128 | 129 | ## TODO 130 | - getting direct link of videos 131 | - adding more anime streaming websites 132 | - Download or stream any episode or episode range of any anime. 133 | - Specify the quality you want to stream or download. 134 | 135 | ### Supported Websites 136 | 137 | - 9anime.to 138 | - kissanime.ru 139 | - gogoanime (currently this is the only one supported) 140 | - horriblesub. 141 | - animekisa 142 | - anistream 143 | - wonderfulsub 144 | - chia anime 145 | - ryuanime 146 | - BestAnimes 147 | - animeultima 148 | - animepahe 149 | - KissCartoon 150 | - twist.moe 151 | - animeflix (supported as it have API's) 152 | 153 | 154 | # IMPORTANT NOTE 155 | This is API in created without the consent of the owner of the websites, the main purpose of this project was for me to learn webscraping, so if you want to use our API, we are not be held responsible for any legal action taken on you by the owner. 156 | 157 | ## Arigatou Gozaimasu 158 | -------------------------------------------------------------------------------- /Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import requests\n", 10 | "import json\n", 11 | "import os\n", 12 | "from bs4 import BeautifulSoup\n" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import flask\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [] 30 | } 31 | ], 32 | "metadata": { 33 | "kernelspec": { 34 | "display_name": "Python 3", 35 | "language": "python", 36 | "name": "python3" 37 | }, 38 | "language_info": { 39 | "codemirror_mode": { 40 | "name": "ipython", 41 | "version": 3 42 | }, 43 | "file_extension": ".py", 44 | "mimetype": "text/x-python", 45 | "name": "python", 46 | "nbconvert_exporter": "python", 47 | "pygments_lexer": "ipython3", 48 | "version": "3.7.3" 49 | } 50 | }, 51 | "nbformat": 4, 52 | "nbformat_minor": 2 53 | } 54 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | #importing libraries 2 | from flask import jsonify, request,Flask,json 3 | import requests 4 | from bs4 import BeautifulSoup 5 | 6 | 7 | def getNew(): 8 | page = requests.get(BASE_URL) 9 | soup = BeautifulSoup(page.content, "html.parser") 10 | table = soup.find('div', attrs = {'class':'last_episodes loaddub'}).find_all('li') 11 | data=[ {'url':r.div.a['href'],'title':r.div.a['title'],'img':r.div.img['src'], 'episode':r.find('p','episode').text} for r in table] 12 | data={"data":data} 13 | return json.dumps(data) 14 | 15 | 16 | def getAnime( url): 17 | URL=BASE_URL+"category" + url.split("-episode")[0] 18 | page = requests.get(URL) 19 | soup = BeautifulSoup(page.content, "html.parser") 20 | ep=soup.find('ul',attrs={'id':'episode_page'}) 21 | url=URL.split('category/') 22 | url=url[0]+url[1] 23 | table = soup.find('div', attrs = {'class':'anime_info_body'}) 24 | data={'img':table.img['src'],'title':table.h1.text,'base_url':url,'episodes':ep.a['ep_end'], 'other':[{x.span.text:x.text} for x in table.find_all('p',attrs={'class':'type'})] } 25 | data={"data":data} 26 | return json.dumps(data) 27 | 28 | 29 | def getEpisode( url, ep_num): 30 | url + "-episode-" + ep_num 31 | page = requests.get(url) 32 | soup = BeautifulSoup(page.content, "html.parser") 33 | ep=soup.find('div',attrs={'class':'anime_muti_link'}).find_all('li') 34 | down=soup.find('div',attrs={'class':'anime_video_body_cate'}).find_all('a') 35 | data={'stream':[{l['class'][0]:l.a['data-video']} for l in ep ], "download":down[-1]['href']} 36 | data={"data":data} 37 | 38 | return json.dumps(data) 39 | 40 | 41 | def search(anime): 42 | URL=BASE_URL+'search.html?keyword=' 43 | anime= URL+anime 44 | page = requests.get(anime) 45 | soup = BeautifulSoup(page.content, "html.parser") 46 | table = soup.find('div', attrs = {'class':'last_episodes'}).find_all('li') 47 | data=[ {'title':x.find('p','name').a['title'], 'img':x.div.a.img['src'],'url':x.div.a['href'],'released': x.find('p','released').text.strip()} for x in table] 48 | data={"data":data} 49 | 50 | return json.dumps(data) 51 | 52 | 53 | # app 54 | app = Flask(__name__) 55 | 56 | @app.route('/', methods=['GET']) 57 | def predict(): 58 | 59 | # get data 60 | req = request.get_json(force=True) 61 | if req["intent"]== 'new': 62 | return getNew() 63 | # return data 64 | elif req["intent"]=="getAnime": 65 | return getAnime(req["url"]) 66 | 67 | elif req["intent"]=="episode": 68 | return getEpisode(req["url"],req["episode_number"]) 69 | elif req['intent']=='search': 70 | return search(req['anime']) 71 | 72 | return getNew() 73 | 74 | if __name__ == '__main__': 75 | app.run() 76 | -------------------------------------------------------------------------------- /extractor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/extractor/__init__.py -------------------------------------------------------------------------------- /geckodriver.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/geckodriver.log -------------------------------------------------------------------------------- /model/AnimeEpisode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/model/AnimeEpisode.py -------------------------------------------------------------------------------- /model/AnimeSearch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/model/AnimeSearch.py -------------------------------------------------------------------------------- /model/anime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/model/anime.py -------------------------------------------------------------------------------- /model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/model/model.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | Flask==1.0.2 3 | Flask-RESTful==0.3.6 4 | gunicorn==19.9.0 5 | beautifulsoup4 6 | requests 7 | -------------------------------------------------------------------------------- /sites/ChiaAnime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/sites/ChiaAnime.py -------------------------------------------------------------------------------- /sites/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/sites/__init__.py -------------------------------------------------------------------------------- /sites/anime1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/sites/anime1.py -------------------------------------------------------------------------------- /sites/gogoanime.py: -------------------------------------------------------------------------------- 1 | 2 | import requests 3 | from bs4 import BeautifulSoup 4 | 5 | class Gogoanime: 6 | BASE_URL="https://www.gogoanime.io/" 7 | -------------------------------------------------------------------------------- /sites/nine_anime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/sites/nine_anime.py -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rawkush/Anime_Scraper/9283498ff206eee700fd83fd2a2d82fcad4c17f1/test.py --------------------------------------------------------------------------------