├── test ├── __init__.py └── test_instagramy.py ├── MANIFEST.in ├── requirements.txt ├── docs ├── _config.yml └── index.md ├── instagramy ├── core │ ├── __init__.py │ ├── user_agent.py │ ├── requests.py │ ├── exceptions.py │ ├── cache.py │ └── parser.py ├── plugins │ ├── __init__.py │ ├── analysis.py │ ├── download.py │ └── manual_loading.py ├── __init__.py ├── __main__.py ├── InstagramHashTag.py ├── InstagramUser.py ├── InstagramLocation.py └── InstagramPost.py ├── samples ├── post.png ├── user.png ├── hashtag.png └── sessionid.gif ├── setup.cfg ├── .gitignore ├── setup.py ├── .github └── workflows │ ├── python-publish.yml │ └── python-package.yml ├── LICENSE.txt ├── CODE_OF_CONDUCT.md └── README.md /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # No requirements 2 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /instagramy/core/__init__.py: -------------------------------------------------------------------------------- 1 | # File for to make it as a Python Package 2 | -------------------------------------------------------------------------------- /instagramy/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | """ Various Plugins for instagramy """ 2 | -------------------------------------------------------------------------------- /samples/post.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yogeshwaran01/instagramy/HEAD/samples/post.png -------------------------------------------------------------------------------- /samples/user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yogeshwaran01/instagramy/HEAD/samples/user.png -------------------------------------------------------------------------------- /samples/hashtag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yogeshwaran01/instagramy/HEAD/samples/hashtag.png -------------------------------------------------------------------------------- /samples/sessionid.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yogeshwaran01/instagramy/HEAD/samples/sessionid.gif -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git, 3 | .vscode, 4 | docs, 5 | venv, 6 | .gitignore, 7 | README.md, 8 | test 9 | 10 | max-line-length = 119 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | dist 3 | build 4 | instagramy.egg-info 5 | .vscode 6 | instagramy/__pycache__ 7 | test/__pycache__ 8 | core/__pycache__ 9 | instagramy/core/__pycache__ 10 | .instagramy_cache 11 | instagramy/plugins/__pycache__ 12 | .old 13 | instagramy/plugins/__pycache__ 14 | test.json 15 | -------------------------------------------------------------------------------- /instagramy/core/user_agent.py: -------------------------------------------------------------------------------- 1 | """ User Agents for webscraping """ 2 | 3 | # User-Agents for Web-Scraping 4 | user_agents = [ 5 | "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", 6 | "Googlebot/2.1 (+http://www.googlebot.com/bot.html)", 7 | "Googlebot/2.1 (+http://www.google.com/bot.html)", 8 | ] 9 | -------------------------------------------------------------------------------- /instagramy/core/requests.py: -------------------------------------------------------------------------------- 1 | """ Wrapper for urllib.request """ 2 | 3 | import random 4 | from typing import Any 5 | from urllib.request import Request, urlopen 6 | 7 | from .user_agent import user_agents 8 | 9 | 10 | def get(url: str, sessionid=None) -> Any: 11 | """ 12 | Function send the HTTP requests to Instagram and 13 | Login into Instagram with session id 14 | and return the Html Content 15 | """ 16 | 17 | request = Request( 18 | url=url, headers={"User-Agent": f"user-agent: {random.choice(user_agents)}"} 19 | ) 20 | if sessionid: 21 | request.add_header("Cookie", f"sessionid={sessionid}") 22 | with urlopen(request) as response: 23 | html = response.read() 24 | 25 | return html.decode("utf-8") 26 | -------------------------------------------------------------------------------- /instagramy/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Instagramy 4 | ~~~~~~~~~~ 5 | 6 | A python package for Instagram. It scarpe the Instagram contents. 7 | 8 | :license: MIT License 9 | """ 10 | 11 | __package__ = "instagramy" 12 | __description__ = "Python Package for Instagram User, Posts, Hashtags and Locations data" 13 | __url__ = "https://github.com/yogeshwaran01/instagramy" 14 | __version__ = "4.5" 15 | __author__ = "YOGESHWARAN R " 16 | __license__ = "MIT License" 17 | __copyright__ = "Copyright 2021 Yogeshwaran R" 18 | 19 | __all__ = ["InstagramUser", "InstagramHashTag", "InstagramPost", "InstagramLocation"] 20 | 21 | from .InstagramUser import InstagramUser 22 | from .InstagramPost import InstagramPost 23 | from .InstagramHashTag import InstagramHashTag 24 | from .InstagramLocation import InstagramLocation 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="instagramy", 8 | version="4.5", 9 | license='MIT', 10 | author="Yogeshwaran R", 11 | author_email="yogeshin247@gmail.com", 12 | description="Python Package for Instagram User, Posts, Hashtags and Locations data", 13 | long_description=long_description, 14 | long_description_content_type="text/markdown", 15 | url="https://github.com/yogeshwaran01/instagramy/", 16 | packages=setuptools.find_packages(), 17 | download_url="https://github.com/yogeshwaran01/instagramy/archive/master.zip", 18 | classifiers=[ 19 | "Programming Language :: Python :: 3", 20 | "License :: OSI Approved :: MIT License", 21 | "Operating System :: OS Independent", 22 | ] 23 | ) 24 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Copyright (c) [2021] [instagramy] 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.7, 3.8, 3.9] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | python -m pip install flake8 30 | - name: Lint with flake8 31 | run: | 32 | # stop the build if there are Python syntax errors or undefined names 33 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 34 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 35 | flake8 -v 36 | - name: Test with unittest 37 | run: | 38 | python -m unittest 39 | -------------------------------------------------------------------------------- /instagramy/core/exceptions.py: -------------------------------------------------------------------------------- 1 | """ Exception classes raised by Instagramy """ 2 | 3 | from urllib.error import HTTPError 4 | 5 | 6 | class BaseException(HTTPError): 7 | """ Base Exception for instagramy """ 8 | 9 | def __init__(self, name: str): 10 | self.name = name 11 | 12 | 13 | class UsernameNotFound(BaseException): 14 | """ Raise if Username not found in Instagram """ 15 | 16 | def __str__(self): 17 | return f"InstagramUser('{self.name}') not Found" 18 | 19 | 20 | class HashTagNotFound(BaseException): 21 | """ Raise if Hashtag not found in Instagram """ 22 | 23 | def __str__(self): 24 | return f"InstagramHashtag('{self.name}')" 25 | 26 | 27 | class PostIdNotFound(BaseException): 28 | """ Raise if Post id not found in Instagram """ 29 | 30 | def __str__(self): 31 | return f"InstargramPost('{self.name}')" 32 | 33 | 34 | class LocationNotFound(BaseException): 35 | """ Raise if Location not found in Instagram """ 36 | 37 | def __str__(self): 38 | return f"InstargramLocation('{self.name}')" 39 | 40 | 41 | class RedirectionError(Exception): 42 | """ Raise if Instagram Redirects it to Login Page """ 43 | 44 | def __str__(self): 45 | return "Instagram Redirects you to login page, \ 46 | Try After Sometime or Reboot your PC \ 47 | Provide the sessionid to Login" 48 | -------------------------------------------------------------------------------- /instagramy/core/cache.py: -------------------------------------------------------------------------------- 1 | """ Caches Management """ 2 | 3 | import os 4 | import json 5 | import shutil 6 | 7 | cache_dir = ".instagramy_cache" 8 | 9 | 10 | class Cache: 11 | 12 | """ Class for caches Management """ 13 | 14 | def __init__(self, key: str): 15 | self.key = key 16 | if not os.path.isdir(cache_dir): 17 | os.mkdir(cache_dir) 18 | with open(cache_dir + "/CACHEDIR.TAG", "w") as file: 19 | file.write( 20 | "# This file is a cache directory tag created by instagramy." + "\n" 21 | ) 22 | 23 | def is_exists(self, name: str) -> bool: 24 | return os.path.isfile(cache_dir + f"/{name}_{self.key}" + ".json") 25 | 26 | def make_cache(self, name: str, data: dict): 27 | with open(cache_dir + f"/{name}_{self.key}" + ".json", "w") as file: 28 | json.dump(data, file) 29 | 30 | def read_cache(self, name: str) -> dict: 31 | with open(cache_dir + f"/{name}_{self.key}" + ".json", "r") as file: 32 | return json.load(file) 33 | 34 | 35 | def list_caches() -> None: 36 | """ List of all Cache files created by instagramy in current dir """ 37 | 38 | return os.listdir(cache_dir) 39 | 40 | 41 | def clear_caches() -> None: 42 | """ Clear all Caches created by instagramy in current dir """ 43 | 44 | return shutil.rmtree(cache_dir, ignore_errors=True) 45 | -------------------------------------------------------------------------------- /instagramy/plugins/analysis.py: -------------------------------------------------------------------------------- 1 | """ Plugins for Analyze Instagram """ 2 | 3 | from instagramy.InstagramUser import InstagramUser 4 | from instagramy.InstagramHashTag import InstagramHashTag 5 | 6 | __all__ = ["analyze_users_popularity", "analyze_hashtags", "analyze_user_recent_posts"] 7 | 8 | 9 | def analyze_users_popularity(usernames: list, sessionid: str) -> dict: 10 | """ Functions return the required data to Analze Instagram users """ 11 | 12 | followers = [] 13 | following = [] 14 | posts = [] 15 | for username in usernames: 16 | user = InstagramUser(username, sessionid) 17 | followers.append(user.number_of_followers) 18 | following.append(user.number_of_followings) 19 | posts.append(user.number_of_posts) 20 | data = { 21 | "Usernames": usernames, 22 | "Followers": followers, 23 | "Following": following, 24 | "Posts": posts, 25 | } 26 | return data 27 | 28 | 29 | def analyze_user_recent_posts(username: str, sessionid: str) -> dict: 30 | """ Functions return the required data to Analze Instagram user recent post """ 31 | 32 | user = InstagramUser(username, sessionid) 33 | posts = user.posts 34 | urls = [] 35 | likes = [] 36 | comments = [] 37 | for post in posts: 38 | urls.append(post["post_url"]) 39 | likes.append(post["likes"]) 40 | comments.append(post["comments"]) 41 | 42 | return {"Posts": urls, "Likes": likes, "Comments": comments} 43 | 44 | 45 | def analyze_hashtags(hashtags: list, sessionid: str) -> dict: 46 | """ Functions return the required data to Analze Instagram Hashtags """ 47 | 48 | posts = [] 49 | for hashtag in hashtags: 50 | tag = InstagramHashTag(hashtag, sessionid) 51 | posts.append(tag.number_of_posts) 52 | 53 | return {"Hashtag": hashtags, "Posts": posts} 54 | -------------------------------------------------------------------------------- /instagramy/plugins/download.py: -------------------------------------------------------------------------------- 1 | """ Plugins for Download image from Instagram """ 2 | 3 | import os 4 | import urllib.request 5 | 6 | from instagramy.InstagramUser import InstagramUser 7 | from instagramy.InstagramPost import InstagramPost 8 | from instagramy.InstagramHashTag import InstagramHashTag 9 | 10 | __all__ = ["download_profile_pic", "download_post", "download_hashtags_posts"] 11 | 12 | 13 | def download_profile_pic(username: str, sessionid=None, filepath=None) -> tuple: 14 | """ Download Instagram User Profile Picture """ 15 | 16 | user = InstagramUser(username, sessionid) 17 | if filepath is None: 18 | filepath = f"{username}.jpg" 19 | pic_url = user.profile_picture_url 20 | return urllib.request.urlretrieve(pic_url, filename=filepath) 21 | 22 | 23 | def download_post(id: str, sessionid=None, filepath=None) -> tuple: 24 | """ Download Instagram Post """ 25 | 26 | post = InstagramPost(id, sessionid) 27 | if filepath is None: 28 | filepath = f"{id}.mp4" 29 | post_url = post.post_source 30 | return urllib.request.urlretrieve(post_url, filename=filepath) 31 | 32 | 33 | def download_hashtags_posts(tag: str, sessionid: str, count=1): 34 | """ 35 | Download posts of particualar Hashtag 36 | - It create the directory with name of give tagname 37 | - Download given count of posts with name of post id 38 | """ 39 | 40 | if count > 65: 41 | raise Exception("Count must be less than 65") 42 | 43 | tag = InstagramHashTag(tag, sessionid) 44 | posts_ids = [post.shortcode for post in tag.top_posts][:count] 45 | os.mkdir(tag.tagname) 46 | for posts_id in posts_ids: 47 | post = InstagramPost(posts_id, sessionid) 48 | post_link = post.post_source 49 | if post.type_of_post == "GraphVideo": 50 | urllib.request.urlretrieve( 51 | post_link, filename=f"{tag.tagname}/{posts_id}.mp4" 52 | ) 53 | else: 54 | urllib.request.urlretrieve( 55 | post_link, filename=f"{tag.tagname}/{posts_id}.jpg" 56 | ) 57 | 58 | return True 59 | -------------------------------------------------------------------------------- /instagramy/plugins/manual_loading.py: -------------------------------------------------------------------------------- 1 | """ 2 | instagramy.plugins.manual_loading 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | Parse data of Instagram with manual feeding of Instagram Data in offline mode. 6 | This classes are more useful for data analysis purpose. Store the data in json 7 | file and Parse this data by using this classes instead of sending multiple requests 8 | to the Instagram 9 | 10 | Usage Example 11 | ------------- 12 | :: 13 | >>> import json 14 | >>> from instagramy import InstagramUser 15 | 16 | >>> user = InstagramUser('github') 17 | >>> user_data = user.user_data 18 | 19 | # store data of user in json file 20 | >>> with open('github_user.json', 'w') as file_obj: 21 | ... json.dump(user_data, file_obj) 22 | 23 | >>> from instagramy.plugins.manual_loading import InstagramUser 24 | 25 | # using the stored data 26 | >>> with open('github_user.json', 'r') as file_obj: 27 | ... user_data = json.load(file_obj) 28 | >>> user = InstagramUser(user_data) 29 | >>> user.number_of_followers 30 | 31 | """ 32 | 33 | from instagramy.core.parser import UserParser 34 | from instagramy.core.parser import TagParser 35 | from instagramy.core.parser import PostParser 36 | from instagramy.core.parser import LocationParser 37 | 38 | 39 | class InstagramUser(UserParser): 40 | r""" 41 | Parse the data of User from manual loading 42 | 43 | :param data: user_data from `instagramy.InstagramUser.user_data` 44 | """ 45 | 46 | def __init__(self, data: dict): 47 | self.user_data = data 48 | 49 | 50 | class InstagramPost(PostParser): 51 | r""" 52 | Parse the data of Post from manual loading 53 | 54 | :param data: post_data from `instagramy.InstagramPost.post_data` 55 | """ 56 | 57 | def __init__(self, data: dict): 58 | self.post_data = data 59 | 60 | 61 | class InstagramHashTag(TagParser): 62 | r""" 63 | Parse the data of hashtag from manual loading 64 | 65 | :param data: tag_data from `instagramy.InstagramHashTag.tag_data` 66 | """ 67 | 68 | def __init__(self, data: dict): 69 | self.tag_data = data 70 | 71 | 72 | class InstargramLocation(LocationParser): 73 | r""" 74 | Parse the data of location from manual loading 75 | 76 | :param data: location_data from `instagramy.InstagramLocation.location_data` 77 | """ 78 | 79 | def __init__(self, data: dict): 80 | self.location_data = data 81 | -------------------------------------------------------------------------------- /instagramy/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | 4 | from .InstagramUser import InstagramUser 5 | from .InstagramPost import InstagramPost 6 | from .InstagramHashTag import InstagramHashTag 7 | 8 | from instagramy import __version__ 9 | from instagramy import __package__ 10 | from instagramy import __description__ 11 | 12 | 13 | def _pprint(data): 14 | for key, value in data.items(): 15 | if value is None: 16 | value = "" 17 | print("{:<10} {:<10} ".format(key, value)) 18 | 19 | 20 | def _user(username): 21 | user = InstagramUser(username) 22 | return { 23 | "Username": user.username, 24 | "Name": user.fullname, 25 | "Biography": user.biography, 26 | "Followers": user.number_of_followers, 27 | "Following": user.number_of_followings, 28 | "Posts": user.number_of_posts, 29 | } 30 | 31 | 32 | def _post(post_id): 33 | post = InstagramPost(post_id) 34 | return { 35 | "Post Id": post.post_id, 36 | "Author": post.author, 37 | "Likes": post.number_of_likes, 38 | "Comments": post.number_of_comments, 39 | "Date": post.upload_date, 40 | "Caption": post.caption, 41 | } 42 | 43 | 44 | def _tag(tag): 45 | t = InstagramHashTag(tag) 46 | return {"Hashtag": "#" + t.tagname, "Posts": t.number_of_posts} 47 | 48 | 49 | if __name__ == "__main__": 50 | 51 | parser = argparse.ArgumentParser(description=__description__) 52 | 53 | parser.add_argument( 54 | "-u", "--user", required=False, help="Instagram Username", type=str 55 | ) 56 | parser.add_argument( 57 | "-p", "--post", required=False, help="Instagram Post ID", type=str 58 | ) 59 | parser.add_argument( 60 | "-t", "--tag", required=False, help="Instagram Hashtag name", type=str 61 | ) 62 | parser.add_argument( 63 | "-v", "--version", help="Version of the Package", action="store_true" 64 | ) 65 | 66 | args = parser.parse_args() 67 | 68 | username = args.user 69 | tag_name = args.tag 70 | post = args.post 71 | version = args.version 72 | 73 | if version or len(sys.argv) == 1: 74 | ver = sys.version_info 75 | _pprint( 76 | { 77 | "Python": f"{ver.major}.{ver.minor}.{ver.micro}", 78 | f"{__package__}": f"{__version__}", 79 | } 80 | ) 81 | 82 | if username: 83 | _pprint(_user(username)) 84 | elif tag_name: 85 | _pprint(_tag(tag_name)) 86 | elif post: 87 | _pprint(_post(post)) 88 | -------------------------------------------------------------------------------- /test/test_instagramy.py: -------------------------------------------------------------------------------- 1 | # Unable to fetch data from Instagram effectively 2 | # So, Some data are already get stored in other website for testing the parsers 3 | import unittest 4 | import json 5 | 6 | from instagramy.core.requests import get 7 | from instagramy.plugins.manual_loading import * 8 | 9 | 10 | # loading sample data for test from other website 11 | 12 | user_data = json.loads(get("https://yogeshwaran01.herokuapp.com/user_data")) 13 | post_data = json.loads(get("https://yogeshwaran01.herokuapp.com/post_data")) 14 | tag_data = json.loads(get("https://yogeshwaran01.herokuapp.com/tag_data")) 15 | location_data = json.loads(get("https://yogeshwaran01.herokuapp.com/location_data")) 16 | 17 | 18 | class TestParsers(unittest.TestCase): 19 | """ Test case for all Instagramy Parsers """ 20 | 21 | def test_InstagramUser(self): 22 | """ Test case for class `InstagramUser` """ 23 | user = InstagramUser(user_data) 24 | self.assertEqual(user.biography, "Built for developers.") 25 | self.assertIsNone(user.connected_fb_page) 26 | self.assertFalse(user.followed_by_viewer) 27 | self.assertFalse(user.follows_viewer) 28 | self.assertEqual(user.fullname, "GitHub") 29 | self.assertFalse(user.has_blocked_viewer) 30 | self.assertFalse(user.has_country_block) 31 | self.assertFalse(user.has_blocked_viewer) 32 | self.assertFalse(user.is_blocked_by_viewer) 33 | self.assertFalse(user.is_joined_recently) 34 | self.assertFalse(user.is_private) 35 | self.assertTrue(user.is_verified) 36 | self.assertAlmostEqual(user.no_of_mutual_follower, 0) 37 | self.assertAlmostEqual(user.number_of_followers, 139340) 38 | self.assertAlmostEqual(user.number_of_followings, 20) 39 | self.assertAlmostEqual(user.number_of_posts, 182) 40 | self.assertEqual(user.username, "github") 41 | 42 | def test_InstagramPost(self): 43 | """ Test case for class `InstagramPost` """ 44 | post = InstagramPost(post_data) 45 | self.assertEqual(post.author, "chilll_memes") 46 | self.assertEqual( 47 | post.caption, 48 | "Photo by CHILL MEMES in MRC Nagar with @memepattarai2.0. May be a meme of 3 people and text that says 'private Hospital nurse Chilll_ memes Govt Hospital nurse'.", 49 | ) 50 | self.assertEqual(post.number_of_comments, 0) 51 | self.assertEqual(post.number_of_likes, 21) 52 | 53 | def test_InstagramHashtag(self): 54 | """ Test case for class `InstagramHashtag` """ 55 | tag = InstagramHashTag(tag_data) 56 | self.assertEqual(tag.number_of_posts, 3600401) 57 | self.assertEqual(tag.tagname, "python") 58 | 59 | def test_InstagramLocation(self): 60 | """ Test case for class `InstagramLocation` """ 61 | location = InstargramLocation(location_data) 62 | self.assertEqual(location.latitude, 32.86367) 63 | self.assertEqual(location.longitude, -117.212101) 64 | self.assertAlmostEqual(location.number_of_posts, 45580) 65 | -------------------------------------------------------------------------------- /instagramy/InstagramHashTag.py: -------------------------------------------------------------------------------- 1 | """ 2 | instagramy.InstagramHashtag 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | This module scrape data 6 | of given Instagram Hashtag. 7 | 8 | Usage Example 9 | ------------- 10 | :: 11 | 12 | >>> from instagramy.InstagramHashtag import InstagramHashtag 13 | 14 | >>> tag = InstagramHashtag('python') 15 | >>> tag.number_of_posts 16 | >>> tag.top_posts 17 | 18 | """ 19 | 20 | from .core.parser import Parser 21 | from .core.parser import Viewer 22 | from .core.parser import TagParser 23 | from .core.exceptions import HashTagNotFound 24 | from .core.exceptions import RedirectionError 25 | from .core.exceptions import HTTPError 26 | from .core.cache import Cache 27 | from .core.requests import get 28 | 29 | 30 | class InstagramHashTag(TagParser): 31 | r""" 32 | Scrapes instagram hashtag information 33 | 34 | :param tag: Name of the Instagram Hashtag 35 | :param sessionid (optional): Session id of Instagram which is in browser cookies 36 | :param from_cache (optional): Get data from the cache of instagramy not from instagram 37 | 38 | >>> hashtag = InstagramHashTag("python") 39 | >>> hashtag.number_of_posts 40 | 3119668 41 | >>> instagram_user.posts_display_urls 42 | """ 43 | 44 | def __init__(self, tag: str, sessionid=None, from_cache=False): 45 | self.url = f"https://www.instagram.com/explore/tags/{tag}/" 46 | self.sessionid = sessionid 47 | cache = Cache("tag") 48 | if from_cache: 49 | if cache.is_exists(tag): 50 | self.tag_data = cache.read_cache(tag) 51 | else: 52 | data = self.get_json() 53 | cache.make_cache( 54 | tag, data["entry_data"]["TagPage"][0]["graphql"]["hashtag"] 55 | ) 56 | self.tag_data = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"] 57 | else: 58 | data = self.get_json() 59 | cache.make_cache( 60 | tag, data["entry_data"]["TagPage"][0]["graphql"]["hashtag"] 61 | ) 62 | try: 63 | self.tag_data = data["entry_data"]["TagPage"][0]["graphql"]["hashtag"] 64 | except KeyError: 65 | raise RedirectionError 66 | if sessionid: 67 | try: 68 | self.viewer = Viewer(data=data["config"]["viewer"]) 69 | except UnboundLocalError: 70 | self.viewer = None 71 | else: 72 | self.viewer = None 73 | 74 | def get_json(self) -> dict: 75 | """ Get Hashtag information from Instagram """ 76 | 77 | try: 78 | html = get(self.url, sessionid=self.sessionid) 79 | except HTTPError: 80 | raise HashTagNotFound(self.url.split("/")[-2]) 81 | parser = Parser() 82 | parser.feed(html) 83 | return parser.Data 84 | 85 | def __repr__(self) -> str: 86 | return f"{self.__class__.__name__}('{self.tagname}')" 87 | 88 | def __str__(self) -> str: 89 | return f"{'#' + self.tagname} has {self.number_of_posts} posts" 90 | -------------------------------------------------------------------------------- /instagramy/InstagramUser.py: -------------------------------------------------------------------------------- 1 | """ 2 | instagramy.InstagramUser 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | This module scrape data 6 | of given Instagram User. 7 | 8 | Usage Example 9 | ------------- 10 | :: 11 | 12 | >>> from instagramy.InstagramUser import InstagramUser 13 | 14 | >>> user = InstagramUser('github') 15 | >>> user.is_verified 16 | >>> user.number_of_followers 17 | >>> user.biography 18 | 19 | """ 20 | 21 | from .core.parser import Parser 22 | from .core.parser import Viewer 23 | from .core.parser import UserParser 24 | from .core.exceptions import UsernameNotFound 25 | from .core.exceptions import RedirectionError 26 | from .core.exceptions import HTTPError 27 | from .core.cache import Cache 28 | from .core.requests import get 29 | 30 | 31 | class InstagramUser(UserParser): 32 | r""" 33 | Scrapes instagram user information. 34 | 35 | :param username: Username of the Instagram user 36 | :param sessionid (optional): Session id of Instagram which is in browser cookies 37 | :param from_cache (optional): Get data from the cache of instagramy not from instagram 38 | 39 | >>> instagram_user = InstagramUser("github") 40 | >>> instagram_user.is_verified 41 | True 42 | >>> instagram_user.biography 43 | 'Built for developers.' 44 | """ 45 | 46 | def __init__(self, username: str, sessionid=None, from_cache=False): 47 | 48 | self.url = f"https://www.instagram.com/{username}/" 49 | self.sessionid = sessionid 50 | cache = Cache("user") 51 | if from_cache: 52 | if cache.is_exists(username): 53 | self.user_data = cache.read_cache(username) 54 | else: 55 | data = self.get_json() 56 | cache.make_cache( 57 | username, data["entry_data"]["ProfilePage"][0]["graphql"]["user"] 58 | ) 59 | self.user_data = data["entry_data"]["ProfilePage"][0]["graphql"]["user"] 60 | else: 61 | data = self.get_json() 62 | cache.make_cache( 63 | username, data["entry_data"]["ProfilePage"][0]["graphql"]["user"] 64 | ) 65 | try: 66 | self.user_data = data["entry_data"]["ProfilePage"][0]["graphql"]["user"] 67 | except KeyError: 68 | raise RedirectionError 69 | if sessionid: 70 | try: 71 | self.viewer = Viewer(data=data["config"]["viewer"]) 72 | except UnboundLocalError: 73 | self.viewer = None 74 | else: 75 | self.viewer = None 76 | 77 | def get_json(self) -> dict: 78 | """ Get user information from Instagram """ 79 | 80 | try: 81 | html = get(self.url, sessionid=self.sessionid) 82 | except HTTPError: 83 | raise UsernameNotFound(self.url.split("/")[-2]) 84 | 85 | parser = Parser() 86 | parser.feed(html) 87 | return parser.Data 88 | 89 | def __str__(self) -> str: 90 | return f"{self.fullname} ({self.username}) -> {self.biography}" 91 | 92 | def __repr__(self) -> str: 93 | return f"{self.__class__.__name__}('{self.username}')" 94 | -------------------------------------------------------------------------------- /instagramy/InstagramLocation.py: -------------------------------------------------------------------------------- 1 | """ 2 | instagramy.InstagramLocation 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | This module scrape data 6 | of given Instagram Location. 7 | 8 | Usage Example 9 | ------------- 10 | :: 11 | 12 | >>> from instagramy.InstagramLocation import InstagramLocation 13 | 14 | >>> location = InstagramLocation('977862530', 'mrc-nagar') 15 | >>> location.number_of_posts 16 | 3119668 17 | >>> location.address 18 | 19 | """ 20 | 21 | from .core.parser import Parser 22 | from .core.parser import Viewer 23 | from .core.parser import LocationParser 24 | from .core.exceptions import LocationNotFound 25 | from .core.exceptions import RedirectionError 26 | from .core.exceptions import HTTPError 27 | from .core.cache import Cache 28 | from .core.requests import get 29 | 30 | 31 | class InstagramLocation(LocationParser): 32 | r""" 33 | Scrapes instagram location information 34 | `https://www.instagram.com/explore/locations//` 35 | `https://www.instagram.com/explore/locations/977862530/mrc-nagar` 36 | 37 | :param location_id: Location id of the location 38 | :param slug: slug name of the location 39 | :param sessionid (optional): Session id of Instagram which is in browser cookies 40 | :param from_cache (optional): Get data from the cache of instagramy not from instagram 41 | 42 | >>> location = InstagramLocation('977862530', 'mrc-nagar') 43 | >>> location.number_of_posts 44 | 3119668 45 | >>> location.address 46 | """ 47 | 48 | def __init__(self, location_id: str, slug: str, sessionid=None, from_cache=False): 49 | self.url = f"https://www.instagram.com/explore/locations/{location_id}/{slug}" 50 | self.sessionid = sessionid 51 | location = location_id + "_" + slug 52 | cache = Cache("location") 53 | if from_cache: 54 | if cache.is_exists(location): 55 | self.location_data = cache.read_cache(location) 56 | else: 57 | data = self.get_json() 58 | cache.make_cache( 59 | location, data["entry_data"]["LocationsPage"][0]["graphql"]["location"] 60 | ) 61 | self.location_data = data["entry_data"]["LocationsPage"][0]["graphql"]["location"] 62 | else: 63 | data = self.get_json() 64 | cache.make_cache( 65 | location, data["entry_data"]["LocationsPage"][0]["graphql"]["location"] 66 | ) 67 | try: 68 | self.location_data = data["entry_data"]["LocationsPage"][0]["graphql"]["location"] 69 | except KeyError: 70 | raise RedirectionError 71 | if sessionid: 72 | try: 73 | self.viewer = Viewer(data=data["config"]["viewer"]) 74 | except UnboundLocalError: 75 | self.viewer = None 76 | else: 77 | self.viewer = None 78 | 79 | def get_json(self) -> dict: 80 | """ Get Location information from Instagram """ 81 | 82 | try: 83 | html = get(self.url, sessionid=self.sessionid) 84 | except HTTPError: 85 | raise LocationNotFound(self.url.split("/")[-2] + "_" + self.url.split("/")[-1]) 86 | parser = Parser() 87 | parser.feed(html) 88 | return parser.Data 89 | 90 | def __repr__(self) -> str: 91 | return f"{self.__class__.__name__}('{self.name}')" 92 | 93 | def __str__(self) -> str: 94 | return f"{self.name} has {self.number_of_posts} posts" 95 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at yogeshin247@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /instagramy/InstagramPost.py: -------------------------------------------------------------------------------- 1 | """ 2 | instagramy.InstagramPost 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | This module scrape Instagram Post data 6 | for given Instagram Post id. 7 | 8 | Usage Example 9 | ------------- 10 | :: 11 | 12 | >>> from instagramy.InstagramHashtag import InstagramPost 13 | 14 | >>> post = InstagramPost('CGeYX2OA61s') 15 | >>> post.author 16 | >>> post.number_of_likes 17 | >>> post.number_of_comments 18 | 19 | """ 20 | 21 | from .core.parser import Viewer 22 | from .core.parser import Parser 23 | from .core.parser import PostParser 24 | from .core.exceptions import PostIdNotFound 25 | from .core.exceptions import RedirectionError 26 | from .core.exceptions import HTTPError 27 | from .core.cache import Cache 28 | from .core.requests import get 29 | 30 | 31 | class InstagramPost(PostParser): 32 | r""" 33 | Scrape the post information 34 | `https://www.instagram.com/p//` 35 | `https://www.instagram.com/p/CGeYX2OA61s/` 36 | 37 | :param post_id: Id of the Instagram post (From url of the post) 38 | :param sessionid (optional): Session id of Instagram which is in browser cookies 39 | :param from_cache (optional): Get data from the cache of instagramy not from instagram 40 | 41 | >>> post = InstagramPost("CGeYX2OA61s") 42 | >>> post.author 43 | '@virat.kohli' 44 | >>> post.number_of_likes 45 | 2203830 46 | >>> post.number_of_comments 47 | 4629 48 | """ 49 | 50 | def __init__(self, post_id: str, sessionid=None, from_cache=False): 51 | self.post_id = post_id 52 | self.url = f"https://www.instagram.com/p/{post_id}/" 53 | self.sessionid = sessionid 54 | cache = Cache("post") 55 | if from_cache: 56 | if cache.is_exists(post_id): 57 | self.post_data = cache.read_cache(post_id) 58 | else: 59 | data = self.get_json() 60 | cache.make_cache( 61 | post_id, 62 | data["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"], 63 | ) 64 | self.post_data = data["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"] 65 | else: 66 | data = self.get_json() 67 | cache.make_cache( 68 | post_id, data["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"] 69 | ) 70 | try: 71 | self.post_data = data["entry_data"]["PostPage"][0]["graphql"][ 72 | "shortcode_media" 73 | ] 74 | except KeyError: 75 | raise RedirectionError 76 | if sessionid: 77 | try: 78 | self.viewer = Viewer(data=data["config"]["viewer"]) 79 | except UnboundLocalError: 80 | self.viewer = None 81 | else: 82 | self.viewer = None 83 | 84 | def get_json(self) -> dict: 85 | """ Get post information from Instagram """ 86 | 87 | try: 88 | html = get(self.url, sessionid=self.sessionid) 89 | except HTTPError: 90 | raise PostIdNotFound(self.post_id) 91 | parser = Parser() 92 | parser.feed(html) 93 | info = parser.Data 94 | return info 95 | 96 | def __repr__(self) -> str: 97 | return f"{self.__class__.__name__}('{self.post_id}')" 98 | 99 | def __str__(self) -> str: 100 | return f"Post ({self.post_id}) posted by {self.author} with {self.number_of_likes} likes" 101 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Instagramy

4 |

Instagramy Logo

5 | 6 |

Python Package for Instagram Without Any external dependencies

7 | 8 | 9 | 10 |

11 | 12 | PyPi 13 | 14 | 15 | Downloads 16 | 17 | GitHub stars 18 | 19 | GitHub forks 20 | 21 | GitHub license 22 | 23 | 24 | Code style 25 | 26 | GitHub Repo size 27 | GitHub Actions 28 | GitHub Actions 29 | 30 |

31 | 32 | 33 | 34 |

35 | Scrape Instagram Users Information, Posts data, Hashtags and Locations data. This Package scrapes the user's recent posts with some information like likes, comments, captions and etc. No external dependencies. 36 |

37 | 38 | 39 | 40 | ## Features 41 | 42 | - It scrapes most of the data of [Instagram user](#Instagram-User-details), [hastags](#Instagram-Hashtag-details), [Posts](#Instagram-Post-details) and [Location](#Instagram-Location-details) 43 | - You can use this package [with login](#Sample-Usage) or [without login](#Use-Without-Login) 44 | - Download [Instagram post](#Plugins-for-Downloading-Posts) and [User profile picture](#Plugins-for-Downloading-Posts) 45 | - Have some [plugins](#Plugins) for Data analysis 46 | - No External dependencies 47 | - Having [caching Function](#Caching-Feature) 48 | - Lightweight 49 | - Easy to Use 50 | 51 | 52 | 53 | ## Download 54 | 55 | ### Installation 56 | 57 | ```bash 58 | 59 | pip install instagramy 60 | 61 | ``` 62 | 63 | ### Upgrade 64 | 65 | ```bash 66 | 67 | pip install instagramy --upgrade 68 | 69 | ``` 70 | 71 | 72 | 73 | ## Sample Usage 74 | 75 | ### Getting Session Id of Instrgram 76 | 77 | For Login into Instagram via instagramy session id is required. No username or password is Needed. You must be login into Instagram via Browser to get session id 78 | 79 | 1. Login into Instagram in default webbrowser 80 | 2. Move to Developer option 81 | 3. Copy the sessionid 82 | - Move to storage and then to cookies and copy the sessionid (Firefox) 83 | - Move to Application and then to storage and then to cookies and copy the sessionid (Chrome) 84 | 85 | **Note:** Check for session id frequently, It may be changed by Instagram 86 | 87 | 88 | 89 | ### Instagram User details 90 | 91 | Class `InstagramUser` scrape some of the information related to the user of the Instagram 92 | 93 | ```python 94 | >>> from instagramy import InstagramUser 95 | 96 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 97 | 98 | >>> user = InstagramUser('google', sessionid=session_id) 99 | 100 | >>> user.is_verified 101 | True 102 | 103 | >>> user.biography 104 | 'Google unfiltered—sometimes with filters.' 105 | 106 | >>> user.user_data # More data about user as dict 107 | ``` 108 | 109 | If you get the data of the user onetime, instagramy store the data as cache file for avoid the error. you can get the data from cache also. Don't provide the sessionid. 110 | 111 | ```python 112 | >>> from instagramy import InstagramUser 113 | 114 | >>> user = InstagramUser('google', from_cache=True) 115 | 116 | >>> user.is_verified 117 | True 118 | ``` 119 | 120 | It is opt of all classes `InstagramUser`, `InstagramHashTag` and `InstagramPost`. 121 | 122 |
Show all Properties 123 |

124 | 125 | - biography 126 | - connected_fb_page 127 | - followed_by_viewer 128 | - follows_viewer 129 | - fullname 130 | - has_blocked_viewer 131 | - has_country_block 132 | - has_requested_viewer 133 | - is_blocked_by_viewer 134 | - is_joined_recently 135 | - is_private 136 | - is_verified 137 | - no_of_mutual_follower 138 | - number_of_followers 139 | - number_of_followings 140 | - number_of_posts 141 | - other_info 142 | - posts 143 | - posts_display_urls 144 | - profile_picture_url 145 | - requested_by_viewer 146 | - restricted_by_viewer 147 | - username 148 | - website 149 | 150 |

151 |
152 | 153 | `InstagramUser.user_data` has more data other than defined as `Properties` 154 | 155 | ### Instagram Hashtag details 156 | 157 | Class `InstagramHashTag` scrape some of the information related to the hash-tag of the Instagram 158 | 159 | you can also set your sessionid as env variable 160 | 161 | ```bash 162 | $ export SESSION_ID="38566737751%3Ah7JpgePGAoLxJe%er40q" 163 | ``` 164 | 165 | ```python 166 | >>> import os 167 | 168 | >>> from instagramy import InstagramHashTag 169 | 170 | >>> session_id = os.environ.get("SESSION_ID") 171 | 172 | >>> tag = InstagramHashtag('google', sessionid=session_id) 173 | 174 | >>> tag.number_of_posts 175 | 9556876 176 | 177 | >>> tag.tag_data # More data about hashtag as dict 178 | ``` 179 | 180 |
Show all Properties 181 |

182 | 183 | - number_of_posts 184 | - posts_display_urls 185 | - profile_pic_url 186 | - tagname 187 | - top_posts 188 | 189 |

190 |
191 | 192 | `InstagramHashTag.tag_data` has more data other than defined as `Properties` 193 | 194 | ### Instagram Post details 195 | 196 | Class `InstagramPost` scrape some of the information related to the particular post of Instagram. It takes the post id as the parameter. You can get the post id from the URL of the Instagram posts from the property of `InstagramUser.posts`. or `InstagramHagTag.top_posts` 197 | 198 | ```python 199 | >>> from instagramy import InstagramPost 200 | 201 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 202 | 203 | >>> post = InstagramPost('CLGkNCoJkcM', sessionid=session_id) 204 | 205 | >>> post.author 206 | 'ipadpograffiti' 207 | 208 | >>> post.number_of_likes 209 | 1439 210 | 211 | >>> post.post_data # More data about post as dict 212 | 213 | ``` 214 | 215 |
Show all Properties 216 |

217 | 218 | - author 219 | - caption 220 | - display_url 221 | - get_json 222 | - number_of_comments 223 | - number_of_likes 224 | - post_source 225 | - text 226 | - type_of_post 227 | - upload_time 228 | 229 |

230 |
231 | 232 | `InstagramPost.post_data` has more data other than defined as `Properties` 233 | 234 | ### Instagram Location details 235 | 236 | Class `InstagramLocation` scrape some of the information and posts related to the given Location . It takes the location id and slug as the parameter. You can get the location id and slug from the URL of the Instagram Location or from the property of `InstagramPost.location.id` and `InstagramPost.location.slug`. 237 | 238 | ```python 239 | >>> from instagramy import InstagramPost 240 | 241 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 242 | 243 | >>> post = InstagramPost('CLGkNCoJkcM', sessionid=session_id) 244 | 245 | >>> location_id, slug = post.location.id, post.location.slug 246 | 247 | >>> from Instagramy import InstagramLocation 248 | 249 | >>> location = InstagramLocation(location_id, slug, session_id) 250 | 251 | >>> location.latitude 252 | 28.6139 253 | 254 | >>> location.longitude 255 | 77.2089 256 | 257 | >>> location.address 258 | {'street_address': 'T2, Indira Gandhi International Airport', 'zip_code': '', 'city_name': 'New Delhi', 'region_name': '', 'country_code': 'IN', 'exact_city_match': False, 'exact_region_match': False, 'exact_country_match': False} 259 | ``` 260 | 261 | you can also get the location id and slug from the instagram url 262 | 263 | ```url 264 | https://www.instagram.com/explore/locations/977862530/mrc-nagar 265 | https://www.instagram.com/explore/locations// 266 | ``` 267 | 268 |
Show all Properties 269 |

270 | 271 | - address 272 | - id 273 | - latitude 274 | - location_data 275 | - longitude 276 | - name 277 | - number_of_posts 278 | - phone 279 | - profile_pic_url 280 | - sessionid 281 | - slug 282 | - top_posts 283 | - url 284 | - viewer 285 | - website 286 | 287 |

288 |
289 | 290 | `InstagramLocation.location_data` has more data other than defined as `Properties` 291 | 292 | ### Plugins 293 | 294 | Instagramy has some plugins for ease 295 | 296 | #### Plugins for Data Analyzing 297 | 298 | - analyze_users_popularity 299 | - analyze_hashtags 300 | - analyze_user_recent_posts 301 | 302 | ```python 303 | >>> import pandas as pd 304 | >>> from instagramy.plugins.analysis import analyze_users_popularity 305 | 306 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 307 | 308 | >>> teams = ["chennaiipl", "mumbaiindians", 309 | "royalchallengersbangalore", "kkriders", 310 | "delhicapitals", "sunrisershyd", 311 | "kxipofficial"] 312 | >>> data = analyze_users_popularity(teams, session_id) 313 | >>> pd.Dataframe(data) 314 | 315 | Usernames Followers Following Posts 316 | 0 chennaiipl 6189292 194 5646 317 | 1 mumbaiindians 6244961 124 12117 318 | 2 royalchallengersbangalore 5430018 59 8252 319 | 3 kkriders 2204739 68 7991 320 | 4 delhicapitals 2097515 75 9522 321 | 5 sunrisershyd 2053824 70 6227 322 | 6 kxipofficial 1884241 67 7496 323 | ``` 324 | 325 | #### Plugins for Downloading Posts 326 | 327 | - download_hashtags_posts 328 | - download_post 329 | - download_profile_pic 330 | 331 | ```python 332 | >>> import os 333 | 334 | >>> from instagramy.plugins.download import * 335 | 336 | >>> session_id = os.environ.get('SESSION_ID') 337 | 338 | >>> download_profile_pic(username='google', sessionid=session_id, filepath='google.png') 339 | 340 | >>> download_post(id="ipadpograffiti", sessionid=session_id, filepath='post.mp4') 341 | 342 | >>> download_hashtags_posts(tag="tamil", session_id=session_id, count=2) 343 | ``` 344 | 345 | ### Use Without Login 346 | 347 | You can use this package without login. Sessionid is not required but it may rise `RedirectionError` error after four to five requests. 348 | 349 | ```python 350 | >>> from instagramy import * 351 | 352 | >>> user = InstagramUser('google') 353 | >>> user.fullname 354 | 'Google' 355 | >>> tag = InstagramHashTag('python') 356 | >>> tag.tag_data 357 | ``` 358 | 359 | ### Caching Feature 360 | 361 | from version `4.3`, Added the new feature that is caching the required data. If you get the data of the user onetime, instagramy store the data as cache json file for avoid the error. you can get the data from cache also. Don't need to provide the sessionid. Instead of sessionid add the optional parameter `from_cache=True`. 362 | 363 | ```python 364 | >>> from instagramy import InstagramUser 365 | 366 | >>> user = InstagramUser('google', from_cache=True) 367 | 368 | >>> user.is_verified 369 | True 370 | ``` 371 | 372 | It is opt of all classes `InstagramUser`, `InstagramHashTag`, `InstagramPost` and `InstagramLocation`. 373 | 374 | Clear all Caches created by instagramy in current dir by 375 | 376 | ```python 377 | >>> from instagramy.core.cache import clear_caches 378 | 379 | >>> clear_caches() # clear all caches of instagramy 380 | 381 | ``` 382 | 383 | List of all Cache files created by instagramy in current dir 384 | 385 | ```python 386 | >>> from instagramy import list_caches 387 | 388 | >>> list_caches() # list all caches of instagramy 389 | ``` 390 | 391 | ## Sample Scripts 392 | 393 | You can get some Sample scripts [Here](https://yogeshwaran01.herokuapp.com/post/Instagramy-Python-Package-for-Instagram) 394 | 395 | 396 | 397 | ## ✏️ Important Notes 398 | 399 | - You can use this package without sessionid (Login). But it may `RedirectionError` after four to five requests. 400 | - class `Viewer` provide the data about currently logged in user. 401 | - Check for session id frequently, It may be changed by Instagram 402 | - If code execution is never gets completed, check and change your session id and try again. 403 | - Don't provide the wrong session_id. 404 | - `InstagramUser.user_data`, `InstagramPost.post_data`, `InstagramHashtag.tag_data` and `InstagramLocation.location_data` which is python `dict` has more and more data other than defined as `Properties`. 405 | - This Package does not scrap all the posts from an account, the limit of the post only 12 (For non-private account) 406 | - This Package not scrap all the posts of given hash-tag it only scrapes the top 60 - 72 posts. 407 | 408 | ## License 409 | 410 | [MIT License](https://github.com/yogeshwaran01/instagramy/blob/master/LICENSE.txt) 411 | 412 | ## Contributions 413 | 414 | Contributions are Welcome. Feel free to report bugs in [issue](https://github.com/yogeshwaran01/instagramy/issues) and fix some bugs by creating [pull requests](https://github.com/yogeshwaran01/instagramy/pulls). Comments, Suggestions, Improvements and Enhancements are always welcome. Let disscuss about it [Here](https://github.com/yogeshwaran01/instagramy/discussions/9). 415 | 416 |

Made with Python ❤️

417 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Instagramy

4 | 5 |

Python Package for Instagram Without Any external dependencies

6 | 7 | 8 | 9 |

10 | 11 | PyPi 12 | 13 | 14 | Downloads 15 | 16 | GitHub stars 17 | 18 | GitHub forks 19 | 20 | GitHub license 21 | 22 | 23 | Code style 24 | 25 | GitHub Repo size 26 | GitHub Actions 27 | GitHub Actions 28 | 29 |

30 | 31 | 32 | 33 |

34 | Scrape Instagram Users Information, Posts data, Hashtags and Locations data. This Package scrapes the user's recent posts with some information like likes, comments, captions and etc. No external dependencies. 35 |

36 | 37 | 38 | 39 | ## Features 40 | 41 | - It scrapes most of the data of [Instagram user](#Instagram-User-details), [hastags](#Instagram-Hashtag-details), [Posts](#Instagram-Post-details) and [Location](#Instagram-Location-details) 42 | - You can use this package [with login](#Sample-Usage) or [without login](#Use-Without-Login) 43 | - Download [Instagram post](#Plugins-for-Downloading-Posts) and [User profile picture](#Plugins-for-Downloading-Posts) 44 | - Have some [plugins](#Plugins) for Data analysis 45 | - No External dependencies 46 | - Having [caching Function](#Caching-Feature) 47 | - Lightweight 48 | - Easy to Use 49 | 50 | 51 | 52 | ## Download 53 | 54 | ### Installation 55 | 56 | ```bash 57 | 58 | pip install instagramy 59 | 60 | ``` 61 | 62 | ### Upgrade 63 | 64 | ```bash 65 | 66 | pip install instagramy --upgrade 67 | 68 | ``` 69 | 70 | 71 | 72 | ## Sample Usage 73 | 74 | ### Getting Session Id of Instrgram 75 | 76 | For Login into Instagram via instagramy session id is required. No username or password is Needed. You must be login into Instagram via Browser to get session id 77 | 78 | 1. Login into Instagram in default webbrowser 79 | 2. Move to Developer option 80 | 3. Copy the sessionid 81 | - Move to storage and then to cookies and copy the sessionid (Firefox) 82 | - Move to Application and then to storage and then to cookies and copy the sessionid (Chrome) 83 | 84 | **Note:** Check for session id frequently, It may be changed by Instagram 85 | 86 | 87 | 88 | ### Instagram User details 89 | 90 | Class `InstagramUser` scrape some of the information related to the user of the Instagram 91 | 92 | ```python 93 | >>> from instagramy import InstagramUser 94 | 95 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 96 | 97 | >>> user = InstagramUser('google', sessionid=session_id) 98 | 99 | >>> user.is_verified 100 | True 101 | 102 | >>> user.biography 103 | 'Google unfiltered—sometimes with filters.' 104 | 105 | >>> user.user_data # More data about user as dict 106 | ``` 107 | 108 | If you get the data of the user onetime, instagramy store the data as cache file for avoid the error. you can get the data from cache also. Don't provide the sessionid. 109 | 110 | ```python 111 | >>> from instagramy import InstagramUser 112 | 113 | >>> user = InstagramUser('google', from_cache=True) 114 | 115 | >>> user.is_verified 116 | True 117 | ``` 118 | 119 | It is opt of all classes `InstagramUser`, `InstagramHashTag` and `InstagramPost`. 120 | 121 |
Show all Properties 122 |

123 | 124 | - biography 125 | - connected_fb_page 126 | - followed_by_viewer 127 | - follows_viewer 128 | - fullname 129 | - has_blocked_viewer 130 | - has_country_block 131 | - has_requested_viewer 132 | - is_blocked_by_viewer 133 | - is_joined_recently 134 | - is_private 135 | - is_verified 136 | - no_of_mutual_follower 137 | - number_of_followers 138 | - number_of_followings 139 | - number_of_posts 140 | - other_info 141 | - posts 142 | - posts_display_urls 143 | - profile_picture_url 144 | - requested_by_viewer 145 | - restricted_by_viewer 146 | - username 147 | - website 148 | 149 |

150 |
151 | 152 | `InstagramUser.user_data` has more data other than defined as `Properties` 153 | 154 | ### Instagram Hashtag details 155 | 156 | Class `InstagramHashTag` scrape some of the information related to the hash-tag of the Instagram 157 | 158 | you can also set your sessionid as env variable 159 | 160 | ```bash 161 | $ export SESSION_ID="38566737751%3Ah7JpgePGAoLxJe%er40q" 162 | ``` 163 | 164 | ```python 165 | >>> import os 166 | 167 | >>> from instagramy import InstagramHashTag 168 | 169 | >>> session_id = os.environ.get("SESSION_ID") 170 | 171 | >>> tag = InstagramHashtag('google', sessionid=session_id) 172 | 173 | >>> tag.number_of_posts 174 | 9556876 175 | 176 | >>> tag.tag_data # More data about hashtag as dict 177 | ``` 178 | 179 |
Show all Properties 180 |

181 | 182 | - number_of_posts 183 | - posts_display_urls 184 | - profile_pic_url 185 | - tagname 186 | - top_posts 187 | 188 |

189 |
190 | 191 | `InstagramHashTag.tag_data` has more data other than defined as `Properties` 192 | 193 | ### Instagram Post details 194 | 195 | Class `InstagramPost` scrape some of the information related to the particular post of Instagram. It takes the post id as the parameter. You can get the post id from the URL of the Instagram posts from the property of `InstagramUser.posts`. or `InstagramHagTag.top_posts` 196 | 197 | ```python 198 | >>> from instagramy import InstagramPost 199 | 200 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 201 | 202 | >>> post = InstagramPost('CLGkNCoJkcM', sessionid=session_id) 203 | 204 | >>> post.author 205 | 'ipadpograffiti' 206 | 207 | >>> post.number_of_likes 208 | 1439 209 | 210 | >>> post.post_data # More data about post as dict 211 | 212 | ``` 213 | 214 |
Show all Properties 215 |

216 | 217 | - author 218 | - caption 219 | - display_url 220 | - get_json 221 | - number_of_comments 222 | - number_of_likes 223 | - post_source 224 | - text 225 | - type_of_post 226 | - upload_time 227 | 228 |

229 |
230 | 231 | `InstagramPost.post_data` has more data other than defined as `Properties` 232 | 233 | ### Instagram Location details 234 | 235 | Class `InstagramLocation` scrape some of the information and posts related to the given Location . It takes the location id and slug as the parameter. You can get the location id and slug from the URL of the Instagram Location or from the property of `InstagramPost.location.id` and `InstagramPost.location.slug`. 236 | 237 | ```python 238 | >>> from instagramy import InstagramPost 239 | 240 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 241 | 242 | >>> post = InstagramPost('CLGkNCoJkcM', sessionid=session_id) 243 | 244 | >>> location_id, slug = post.location.id, post.location.slug 245 | 246 | >>> from Instagramy import InstagramLocation 247 | 248 | >>> location = InstagramLocation(location_id, slug, session_id) 249 | 250 | >>> location.latitude 251 | 28.6139 252 | 253 | >>> location.longitude 254 | 77.2089 255 | 256 | >>> location.address 257 | {'street_address': 'T2, Indira Gandhi International Airport', 'zip_code': '', 'city_name': 'New Delhi', 'region_name': '', 'country_code': 'IN', 'exact_city_match': False, 'exact_region_match': False, 'exact_country_match': False} 258 | ``` 259 | 260 | you can also get the location id and slug from the instagram url 261 | 262 | ```url 263 | https://www.instagram.com/explore/locations/977862530/mrc-nagar 264 | https://www.instagram.com/explore/locations// 265 | ``` 266 | 267 |
Show all Properties 268 |

269 | 270 | - address 271 | - id 272 | - latitude 273 | - location_data 274 | - longitude 275 | - name 276 | - number_of_posts 277 | - phone 278 | - profile_pic_url 279 | - sessionid 280 | - slug 281 | - top_posts 282 | - url 283 | - viewer 284 | - website 285 | 286 |

287 |
288 | 289 | `InstagramLocation.location_data` has more data other than defined as `Properties` 290 | 291 | ### Plugins 292 | 293 | Instagramy has some plugins for ease 294 | 295 | #### Plugins for Data Analyzing 296 | 297 | - analyze_users_popularity 298 | - analyze_hashtags 299 | - analyze_user_recent_posts 300 | 301 | ```python 302 | >>> import pandas as pd 303 | >>> from instagramy.plugins.analysis import analyze_users_popularity 304 | 305 | >>> session_id = "38566737751%3Ah7JpgePGAoLxJe%334" 306 | 307 | >>> teams = ["chennaiipl", "mumbaiindians", 308 | "royalchallengersbangalore", "kkriders", 309 | "delhicapitals", "sunrisershyd", 310 | "kxipofficial"] 311 | >>> data = analyze_users_popularity(teams, session_id) 312 | >>> pd.DataFrame(data) 313 | 314 | Usernames Followers Following Posts 315 | 0 chennaiipl 6189292 194 5646 316 | 1 mumbaiindians 6244961 124 12117 317 | 2 royalchallengersbangalore 5430018 59 8252 318 | 3 kkriders 2204739 68 7991 319 | 4 delhicapitals 2097515 75 9522 320 | 5 sunrisershyd 2053824 70 6227 321 | 6 kxipofficial 1884241 67 7496 322 | ``` 323 | 324 | #### Plugins for Downloading Posts 325 | 326 | - download_hashtags_posts 327 | - download_post 328 | - download_profile_pic 329 | 330 | ```python 331 | >>> import os 332 | 333 | >>> from instagramy.plugins.download import * 334 | 335 | >>> session_id = os.environ.get('SESSION_ID') 336 | 337 | >>> download_profile_pic(username='google', sessionid=session_id, filepath='google.png') 338 | 339 | >>> download_post(id="ipadpograffiti", sessionid=session_id, filepath='post.mp4') 340 | 341 | >>> download_hashtags_posts(tag="tamil", session_id=session_id, count=2) 342 | ``` 343 | 344 | ### Use Without Login 345 | 346 | You can use this package without login. Sessionid is not required but it may rise `RedirectionError` error after four to five requests. 347 | 348 | ```python 349 | >>> from instagramy import * 350 | 351 | >>> user = InstagramUser('google') 352 | >>> user.fullname 353 | 'Google' 354 | >>> tag = InstagramHashTag('python') 355 | >>> tag.tag_data 356 | ``` 357 | 358 | ### Caching Feature 359 | 360 | from version `4.3`, Added the new feature that is caching the required data. If you get the data of the user onetime, instagramy store the data as cache json file for avoid the error. you can get the data from cache also. Don't need to provide the sessionid. Instead of sessionid add the optional parameter `from_cache=True`. 361 | 362 | ```python 363 | >>> from instagramy import InstagramUser 364 | 365 | >>> user = InstagramUser('google', from_cache=True) 366 | 367 | >>> user.is_verified 368 | True 369 | ``` 370 | 371 | It is opt of all classes `InstagramUser`, `InstagramHashTag`, `InstagramPost` and `InstagramLocation`. 372 | 373 | Clear all Caches created by instagramy in current dir by 374 | 375 | ```python 376 | >>> from instagramy.core.cache import clear_caches 377 | 378 | >>> clear_caches() # clear all caches of instagramy 379 | 380 | ``` 381 | 382 | List of all Cache files created by instagramy in current dir 383 | 384 | ```python 385 | >>> from instagramy import list_caches 386 | 387 | >>> list_caches() # list all caches of instagramy 388 | ``` 389 | 390 | ## Sample Scripts 391 | 392 | ### Getting Email address and phone number 393 | 394 | ```python 395 | user = InstagramUser('username') 396 | email, phone_number = user.user_data['business_email'], user.user_data['business_phone_number'] 397 | ``` 398 | 399 | ## ✏️ Important Notes 400 | 401 | - Don't send huge request to Instagram with sessionid, Instagram may ban you. 402 | - You can use this package without sessionid (Login). But it may `RedirectionError` after four to five requests. 403 | - class `Viewer` provide the data about currently logged in user. 404 | - Check for session id frequently, It may be changed by Instagram 405 | - If code execution is never gets completed, check and change your session id and try again. 406 | - Don't provide the wrong session_id. 407 | - `InstagramUser.user_data`, `InstagramPost.post_data`, `InstagramHashtag.tag_data` and `InstagramLocation.location_data` which is python `dict` has more and more data other than defined as `Properties`. 408 | - This Package does not scrap all the posts from an account, the limit of the post only 12 (For non-private account) 409 | - This Package not scrap all the posts of given hash-tag and location it only scrapes the top 60 - 72 posts. 410 | 411 | 412 | 413 | ## Disclaimer 414 | 415 | If you send the huge request to the Instagram with session id Instagram may ban you. I am not responsible for any misuse or damage caused by this program. 416 | 417 | ## License 418 | 419 | [MIT License](https://github.com/yogeshwaran01/instagramy/blob/master/LICENSE.txt) 420 | 421 | ## Contributions 422 | 423 | Contributions are Welcome. Feel free to report bugs in [issue](https://github.com/yogeshwaran01/instagramy/issues) and fix some bugs by creating [pull requests](https://github.com/yogeshwaran01/instagramy/pulls). Comments, Suggestions, Improvements and Enhancements are always welcome. Let disscuss about it [Here](https://github.com/yogeshwaran01/instagramy/discussions/9). 424 | 425 |

Made with Python ❤️

426 | -------------------------------------------------------------------------------- /instagramy/core/parser.py: -------------------------------------------------------------------------------- 1 | """ Parsers for Instagramy """ 2 | 3 | import json 4 | from datetime import datetime 5 | from html.parser import HTMLParser 6 | from collections import namedtuple 7 | 8 | from .exceptions import RedirectionError 9 | from .requests import get 10 | 11 | 12 | def _nodes_classfier(nodes: list): 13 | post_lists = [] 14 | for node in nodes: 15 | data = {} 16 | try: 17 | data["likes"] = node["node"]["edge_liked_by"]["count"] 18 | except (KeyError, TypeError): 19 | data["likes"] = None 20 | try: 21 | data["comments"] = node["node"]["edge_media_to_comment"]["count"] 22 | except (KeyError, TypeError): 23 | data["comments"] = None 24 | try: 25 | data["is_video"] = node["node"]["is_video"] 26 | except (KeyError, TypeError): 27 | data["is_video"] = None 28 | try: 29 | data["upload_time"] = datetime.fromtimestamp( 30 | node["node"]["taken_at_timestamp"] 31 | ) 32 | except (KeyError, TypeError): 33 | data["upload_time"] = None 34 | try: 35 | data["caption"] = node["node"]["accessibility_caption"] 36 | except (KeyError, TypeError): 37 | data["caption"] = None 38 | try: 39 | data["shortcode"] = node["node"]["shortcode"] 40 | except (KeyError, TypeError): 41 | data["shortcode"] = None 42 | try: 43 | data["dimensions"] = node['node']["dimensions"] 44 | except (KeyError, IndexError): 45 | data["dimensions"] = None 46 | try: 47 | data[ 48 | "post_url" 49 | ] = f'https://www.instagram.com/p/{node["node"]["shortcode"]}' 50 | except (KeyError, TypeError): 51 | data["post_url"] = None 52 | try: 53 | data["display_url"] = node["node"]["display_url"] 54 | except (KeyError, TypeError): 55 | data["display_url"] = None 56 | nt = namedtuple("Post", data.keys())(*data.values()) 57 | post_lists.append(nt) 58 | 59 | return post_lists 60 | 61 | 62 | class Parser(HTMLParser): 63 | 64 | """ 65 | Class Parse the Static Html of the Instagram 66 | website and return the required Data as 67 | Python Dict 68 | 69 | This Class Inherits html.parser.HtmlParser 70 | """ 71 | 72 | Data = {} 73 | 74 | def handle_data(self, data): 75 | if data.startswith("window._sharedData"): 76 | try: 77 | self.Data = json.loads(data[data.find('{"config"'): -1]) 78 | except (KeyError, json.JSONDecodeError): 79 | raise RedirectionError 80 | else: 81 | pass 82 | 83 | 84 | class Viewer: 85 | """ 86 | User of Instagram currently Authenticated 87 | Parse the Current User data in Page 88 | """ 89 | 90 | def __init__(self, **kwags): 91 | data = kwags.get("data") 92 | if data: 93 | self.user_data = data 94 | else: 95 | sessionid = kwags.get("sessionid") 96 | html = get("https://instagram.com", sessionid=sessionid) 97 | parser = Parser() 98 | parser.feed(html) 99 | self.user_data = parser.Data 100 | 101 | @property 102 | def username(self) -> str: 103 | """ Username of the given user """ 104 | return self.user_data["username"] 105 | 106 | @property 107 | def fullname(self) -> str: 108 | """ Fullname of the given user """ 109 | return self.user_data["full_name"] 110 | 111 | @property 112 | def biography(self) -> str: 113 | """ Biography of the given user """ 114 | return self.user_data["biography"] 115 | 116 | @property 117 | def website(self) -> str: 118 | """ Website of the given user """ 119 | return self.user_data["external_url"] 120 | 121 | @property 122 | def profile_picture_url(self) -> str: 123 | """ Profile picture url of the Given User """ 124 | return self.user_data["profile_pic_url_hd"] 125 | 126 | @property 127 | def is_private(self) -> bool: 128 | """ Account type is Private """ 129 | return self.user_data["is_private"] 130 | 131 | @property 132 | def is_joined_recently(self) -> bool: 133 | """ is user joined recently """ 134 | return self.user_data["is_joined_recently"] 135 | 136 | @property 137 | def is_professional_account(self) -> bool: 138 | """ is user joined recently """ 139 | return self.user_data["is_professional_account"] 140 | 141 | def __str__(self) -> str: 142 | return f"{self.fullname} ({self.username}) -> {self.biography}" 143 | 144 | def __repr__(self) -> str: 145 | return f"{self.__class__.__name__}('{self.username}')" 146 | 147 | 148 | class UserParser: 149 | """ Parse the required data of user store as property""" 150 | 151 | @property 152 | def username(self) -> str: 153 | """ Username of the given user """ 154 | return self.user_data["username"] 155 | 156 | @property 157 | def fullname(self) -> str: 158 | """ Fullname of the given user """ 159 | return self.user_data["full_name"] 160 | 161 | @property 162 | def biography(self) -> str: 163 | """ Biography of the given user """ 164 | return self.user_data["biography"] 165 | 166 | @property 167 | def website(self) -> str: 168 | """ Website of the given user """ 169 | return self.user_data["external_url"] 170 | 171 | @property 172 | def number_of_followers(self) -> int: 173 | """ No.of Followers of the given user """ 174 | return self.user_data["edge_followed_by"]["count"] 175 | 176 | @property 177 | def number_of_followings(self) -> int: 178 | """ No.of Following of the given user """ 179 | return self.user_data["edge_follow"]["count"] 180 | 181 | @property 182 | def number_of_posts(self) -> int: 183 | """ No.of Post of the given user """ 184 | return self.user_data["edge_owner_to_timeline_media"]["count"] 185 | 186 | @property 187 | def profile_picture_url(self) -> str: 188 | """ Profile picture url of the Given User """ 189 | return self.user_data["profile_pic_url_hd"] 190 | 191 | @property 192 | def is_verified(self) -> bool: 193 | """ Verification status of the user """ 194 | return self.user_data["is_verified"] 195 | 196 | @property 197 | def is_private(self) -> bool: 198 | """ Account type is Private """ 199 | return self.user_data["is_private"] 200 | 201 | @property 202 | def posts(self) -> list: 203 | """ 204 | Top 12 posts data of the given user 205 | """ 206 | 207 | posts_lists = [] 208 | posts_details = self.user_data["edge_owner_to_timeline_media"]["edges"] 209 | for i in posts_details: 210 | data = {} 211 | try: 212 | data["likes"] = i["node"]["edge_liked_by"]["count"] 213 | except (KeyError, TypeError): 214 | data["likes"] = None 215 | try: 216 | data["comments"] = i["node"]["edge_media_to_comment"]["count"] 217 | except (KeyError, TypeError): 218 | data["comments"] = None 219 | try: 220 | data["caption"] = i["node"]["accessibility_caption"] 221 | except (KeyError, TypeError): 222 | data["caption"] = None 223 | try: 224 | data["is_video"] = i["node"]["is_video"] 225 | except (KeyError, TypeError): 226 | data["is_video"] = None 227 | try: 228 | data["timestamp"] = i["node"]["taken_at_timestamp"] 229 | except (KeyError, TypeError): 230 | data["timestamp"] = None 231 | try: 232 | data["location"] = i["node"]["location"] 233 | except (KeyError, TypeError): 234 | data["location"] = None 235 | try: 236 | data["shortcode"] = i["node"]["shortcode"] 237 | except (KeyError, TypeError): 238 | data["shortcode"] = None 239 | try: 240 | data[ 241 | "post_url" 242 | ] = f'https://www.instagram.com/p/{i["node"]["shortcode"]}/' 243 | except (KeyError, TypeError): 244 | data["post_url"] = None 245 | try: 246 | data["display_url"] = i["node"]["display_url"] 247 | except (KeyError, TypeError): 248 | data["display_url"] = None 249 | 250 | if i["node"]["is_video"]: 251 | data["video_url"] = i["node"]["video_url"] 252 | data["video_view_count"] = i["node"]["video_view_count"] 253 | if i["node"]["is_video"]: 254 | data["post_source"] = i["node"]["video_url"] 255 | else: 256 | data["post_source"] = i["node"]["display_url"] 257 | 258 | try: 259 | data["taken_at_timestamp"] = datetime.fromtimestamp( 260 | i["node"]["taken_at_timestamp"] 261 | ) 262 | except (KeyError, TypeError): 263 | data["taken_at_timestamp"] = None 264 | nt = namedtuple("Post", data.keys())(*data.values()) 265 | posts_lists.append(nt) 266 | return posts_lists 267 | 268 | @property 269 | def posts_display_urls(self) -> list: 270 | """ 271 | Top 12 posts picture url of the given user 272 | """ 273 | 274 | return [i.display_url for i in self.posts] 275 | 276 | @property 277 | def is_joined_recently(self) -> bool: 278 | """ Is user joined recently """ 279 | return self.user_data["is_joined_recently"] 280 | 281 | @property 282 | def other_info(self) -> dict: 283 | """ 284 | Other information about user 285 | """ 286 | return { 287 | "is_private": self.user_data["is_private"], 288 | "is_verified": self.user_data["is_verified"], 289 | "is_business_account": self.user_data["is_business_account"], 290 | "is_joined_recently": self.user_data["is_joined_recently"], 291 | "has_ar_effects": self.user_data["has_ar_effects"], 292 | "has_clips": self.user_data["has_clips"], 293 | "has_guides": self.user_data["has_guides"], 294 | "has_channel": self.user_data["has_channel"], 295 | "highlight_reel_count": self.user_data["highlight_reel_count"], 296 | } 297 | 298 | @property 299 | def follows_viewer(self) -> bool: 300 | """ Is user follows the Viewer """ 301 | return self.user_data["follows_viewer"] 302 | 303 | @property 304 | def has_blocked_viewer(self) -> bool: 305 | """ Is user blocked the Viewer """ 306 | return self.user_data["has_blocked_viewer"] 307 | 308 | @property 309 | def no_of_mutual_follower(self) -> bool: 310 | """ No of Mutual Followers """ 311 | return self.user_data["edge_mutual_followed_by"]["count"] 312 | 313 | @property 314 | def requested_by_viewer(self) -> bool: 315 | """ Is viewer requested to follow user """ 316 | return self.user_data["requested_by_viewer"] 317 | 318 | @property 319 | def is_blocked_by_viewer(self) -> bool: 320 | """ Is Viewer blocked the User """ 321 | return self.user_data["blocked_by_viewer"] 322 | 323 | @property 324 | def restricted_by_viewer(self) -> bool: 325 | """ Is Viewer restricted the User """ 326 | return self.user_data["restricted_by_viewer"] 327 | 328 | @property 329 | def has_country_block(self) -> bool: 330 | """ Is country blocked the User """ 331 | return self.user_data["country_block"] 332 | 333 | @property 334 | def followed_by_viewer(self) -> bool: 335 | """ Is Viewer Follows the User """ 336 | return self.user_data["followed_by_viewer"] 337 | 338 | @property 339 | def has_requested_viewer(self) -> bool: 340 | """ Is User requested the Viewer """ 341 | return self.user_data["has_requested_viewer"] 342 | 343 | @property 344 | def connected_fb_page(self) -> bool: 345 | """ Connected Facebook page of User """ 346 | return self.user_data["connected_fb_page"] 347 | 348 | 349 | class PostParser: 350 | """ Parse the required data of post store as property""" 351 | 352 | @property 353 | def type_of_post(self) -> str: 354 | """ Type of the Post""" 355 | return self.post_data["__typename"] 356 | 357 | @property 358 | def display_url(self) -> str: 359 | """ Display url of the Image/Video """ 360 | return self.post_data["display_url"] 361 | 362 | @property 363 | def upload_time(self) -> datetime: 364 | """ Upload Datetime of the Post """ 365 | return datetime.fromtimestamp(self.post_data["taken_at_timestamp"]) 366 | 367 | @property 368 | def number_of_likes(self) -> int: 369 | """ No.of Like is given post """ 370 | return int(self.post_data["edge_media_preview_like"]["count"]) 371 | 372 | @property 373 | def number_of_comments(self) -> int: 374 | """ No.of Comments is given post """ 375 | return int(self.post_data["edge_media_to_parent_comment"]["count"]) 376 | 377 | @property 378 | def author(self) -> str: 379 | """ Author of the Post """ 380 | return self.post_data["owner"]["username"] 381 | 382 | @property 383 | def caption(self) -> str: 384 | """ Caption of the Post """ 385 | return self.post_data["accessibility_caption"] 386 | 387 | @property 388 | def post_source(self) -> str: 389 | """ Post Image/Video Link """ 390 | if self.post_data["is_video"]: 391 | return self.post_data["video_url"] 392 | return self.display_url 393 | 394 | @property 395 | def text(self) -> str: 396 | try: 397 | text = self.post_data["edge_media_to_caption"]["edges"][0]["node"]["text"] 398 | return text 399 | except (KeyError, IndexError): 400 | return None 401 | 402 | @property 403 | def location(self) -> str: 404 | location_data = self.post_data["location"] 405 | Location = namedtuple("Location", ["Address", "id", "slug", "name"]) 406 | if location_data: 407 | return Location( 408 | location_data["address_json"], 409 | location_data["id"], 410 | location_data["slug"], 411 | location_data["name"], 412 | ) 413 | return None 414 | 415 | 416 | class TagParser: 417 | """ Parse the required data of tag store as property""" 418 | 419 | @property 420 | def tagname(self) -> str: 421 | """ Tagname of the Hagtag """ 422 | return self.tag_data["name"] 423 | 424 | @property 425 | def profile_pic_url(self) -> str: 426 | """ Profile picture url of the Hagtag """ 427 | return self.tag_data["profile_pic_url"] 428 | 429 | @property 430 | def number_of_posts(self) -> int: 431 | """ No.of posts in given Hashtag """ 432 | return self.tag_data["edge_hashtag_to_media"]["count"] 433 | 434 | @property 435 | def top_posts(self) -> list: 436 | """ 437 | Top post data (<70) in the given Hashtag 438 | """ 439 | nodes = self.tag_data["edge_hashtag_to_media"]["edges"] 440 | return _nodes_classfier(nodes) 441 | 442 | @property 443 | def posts_display_urls(self) -> list: 444 | """ 445 | Top post (<70) in the given Hashtag 446 | """ 447 | return [i["display_url"] for i in self.top_posts] 448 | 449 | 450 | class LocationParser: 451 | """ Parse the required data of location store as property""" 452 | 453 | @property 454 | def id(self) -> str: 455 | """ Location id of the location """ 456 | return self.location_data["id"] 457 | 458 | @property 459 | def name(self) -> str: 460 | """ Name of the location """ 461 | return self.location_data["name"] 462 | 463 | @property 464 | def latitude(self) -> int: 465 | """ Latitude of the location """ 466 | return self.location_data["lat"] 467 | 468 | @property 469 | def longitude(self) -> int: 470 | """ Longitude of the location """ 471 | return self.location_data["lng"] 472 | 473 | @property 474 | def slug(self) -> str: 475 | """ Slug of the location """ 476 | return self.location_data["slug"] 477 | 478 | @property 479 | def website(self) -> str: 480 | """ Website of the location """ 481 | return self.location_data["website"] 482 | 483 | @property 484 | def phone(self) -> str: 485 | """ Phone Number of the location """ 486 | return self.location_data["phone"] 487 | 488 | @property 489 | def address(self) -> dict: 490 | """ Address of the location """ 491 | return json.loads(self.location_data["address_json"]) 492 | 493 | @property 494 | def profile_pic_url(self) -> dict: 495 | """ Profile Picture of the location """ 496 | return self.location_data["profile_pic_url"] 497 | 498 | @property 499 | def number_of_posts(self) -> int: 500 | """ Number of post in the location """ 501 | return self.location_data["edge_location_to_media"]["count"] 502 | 503 | @property 504 | def top_posts(self) -> int: 505 | """ 506 | Top post data (<70) in the given Location 507 | """ 508 | nodes_1 = self.location_data["edge_location_to_media"]["edges"] 509 | nodes_2 = self.location_data["edge_location_to_top_posts"]["edges"] 510 | 511 | return _nodes_classfier(nodes_1) + _nodes_classfier(nodes_2) 512 | --------------------------------------------------------------------------------