├── .github ├── FUNDING.yml └── header.svg ├── .gitignore ├── LICENSE ├── README.md ├── api └── api.py ├── main.py └── requirements.txt /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | 2 | custom: ["https://paypal.me/yassertahiri"] 3 | -------------------------------------------------------------------------------- /.github/header.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 63 | 64 | 65 | SoIG 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Yasser Tahiri 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![SoIG](.github/header.svg) 2 | 3 | # SoIG 4 | 5 | The Instagram OSINT Tool gets a range of information from an Instagram account that you normally wouldn't be able to get from just looking at their profile. 6 | 7 | ## Getting Started 8 | 9 | **The information includes**: 10 | 11 | - **profile** : Username, Profile Name, URL, Followers, Following, Number of Posts, Bio, Profile Picture URL, Is Business Account , Connected to a FB account , External URL, Joined Recently , Business Category Name, Is private , Is Verified. 12 | 13 | - **tags** : most used , and by `-t` all used tags. 14 | 15 | - **posts** : accessability caption, location, timestamp, comments disabled, Caption, picture url. 16 | 17 | ### Prerequisites 18 | 19 | - Python 3 or higher. 20 | - Linux OS ex. Ubuntu or Kali Linux. 21 | 22 | ### Project setup 23 | 24 | ```sh 25 | # clone the repo 26 | $ git clone https://github.com/yezz123/SoIG 27 | 28 | # move to the project folder 29 | $ cd SoIG 30 | ``` 31 | 32 | ### Creating virtual environment 33 | 34 | - Create a virtual environment using virtualenv. 35 | 36 | ```shell 37 | # creating virtual environment 38 | $ virtualenv venv 39 | 40 | # activate virtual environment 41 | $ source venv/bin/activate 42 | 43 | # install all dependencies 44 | $ pip install -r requirements.txt 45 | ``` 46 | 47 | ### Running the Application 48 | 49 | - You can run the `main.py` file Manually. 50 | 51 | ```sh 52 | # Running the Script 53 | $ python3 main.py 54 | ``` 55 | 56 | ## Usage 57 | 58 | A simple workflow of command used to run SoIG with a Description. 59 | 60 | | Usage | Description | 61 | | ------------------------------------- | --------------------------------------------------------- | 62 | | `-h` , `--help` | show this help message and exit. | 63 | | `-u` USERNAME , `--username` USERNAME | username of account to scan. | 64 | | `-p` , `--postscrap` | scrape all uploaded images info. | 65 | | `-s` , `--savedata` | save data to file ( save profile pic, info , post info ). | 66 | 67 | ## Contributing 68 | 69 | - Join the SoIG Creator and Contribute to the Project if you have any enhancement or add-ons to create a good and Secure Project, Help any User to Use it in a good and simple way. 70 | 71 | ### Disclaimer 72 | 73 | - This project can only be used for educational purposes. Using this software against target systems without prior permission is illegal, and any damages from misuse of this software will not be the responsibility of the author. 74 | 75 | ## License 76 | 77 | This project is licensed under the terms of the [MIT license](LICENSE). 78 | -------------------------------------------------------------------------------- /api/api.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | 3 | import time 4 | import json 5 | import random 6 | import os 7 | import sys 8 | import requests 9 | import collections 10 | import urllib.request 11 | from bs4 import BeautifulSoup 12 | 13 | collections.Callable = collections.abc.Callable 14 | 15 | nu = "\033[0m" 16 | re = "\033[1;31m" 17 | gr = "\033[1;32m" 18 | cy = "\033[1;36m" 19 | 20 | raw_tags = [] 21 | tag_lis = [] 22 | 23 | useragent = [ 24 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36", 25 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393" 26 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36", 27 | "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4" 28 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14", 29 | "Mozilla/5.0 (Linux; U; Android-4.0.3; en-us; Galaxy Nexus Build/IML74K) AppleWebKit/535.7 (KHTML, like Gecko) CrMo/16.0.912.75 Mobile Safari/535.7", 30 | ] 31 | 32 | 33 | class extra: 34 | def tiny_url(url): 35 | apiurl = "http://tinyurl.com/api-create.php?url=" 36 | tinyurl = urllib.request.urlopen(apiurl + url).read() 37 | return tinyurl.decode("utf-8") 38 | 39 | def write(in_text): 40 | for char in in_text: 41 | time.sleep(0.1) 42 | sys.stdout.write(char) 43 | sys.stdout.flush() 44 | 45 | def extract_hash_tags(stri): 46 | return [part[1:] for part in stri.split() if part.startswith("#")] 47 | 48 | def banner(): 49 | print( 50 | f"""{cy} 51 | ╔═╗ ╔═╗ ╦ ╔═╗ 52 | ╚═╗ ║ ║ ║ ║ ╦ 53 | ╚═╝ ╚═╝ {gr}o{cy} ╩ ╚═╝ 54 | 55 | {gr}Coded By : 56 | {gr}Yezz123{nu} 57 | """ 58 | ) 59 | 60 | 61 | class main: 62 | def __init__(self, user): 63 | self.user = user 64 | self.get_profile() 65 | 66 | def get_profile(self): 67 | extra.write(f"\n{gr}[+]{nu} getting profile ...") 68 | profile = requests.get( 69 | f"https://www.instagram.com/{self.user}", 70 | headers={ 71 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36" 72 | }, 73 | ) 74 | soup = BeautifulSoup(profile.text, "html.parser") 75 | more_data = soup.find_all("script", attrs={"type": "text/javascript"}) 76 | self.data = json.loads(more_data[3].get_text()[21:].strip(";")) 77 | self.p_data = self.data["entry_data"]["ProfilePage"][0]["graphql"]["user"] 78 | self.output = { 79 | "username ": str(self.p_data["username"]), 80 | "name ": str(self.p_data["full_name"]), 81 | "url ": str(f"instagram.com/{self.p_data['username']}"), 82 | "followers ": str(self.p_data["edge_followed_by"]["count"]), 83 | "following ": str(self.p_data["edge_follow"]["count"]), 84 | "posts ": str( 85 | self.p_data["edge_owner_to_timeline_media"]["count"] 86 | ), 87 | "bio ": str(self.p_data["biography"].replace("\n", ", ")), 88 | "external url ": str(self.p_data["external_url"]), 89 | "private ": str(self.p_data["is_private"]), 90 | "verified ": str(self.p_data["is_verified"]), 91 | "profile pic url ": extra.tiny_url(str(self.p_data["profile_pic_url_hd"])), 92 | "business account ": str(self.p_data["is_business_account"]), 93 | "connected to fb ": str(self.p_data["connected_fb_page"]), 94 | "joined recently ": str(self.p_data["is_joined_recently"]), 95 | "business category": str(self.p_data["business_category_name"]), 96 | } 97 | 98 | if str(self.p_data["is_private"]).lower() == "true": 99 | print(f"{re}[!]{gr} private profile can't scrap data !\n") 100 | return 1 101 | else: 102 | for index, post in enumerate( 103 | self.p_data["edge_owner_to_timeline_media"]["edges"] 104 | ): 105 | try: 106 | raw_tags.append( 107 | extra.extract_hash_tags( 108 | post["node"]["edge_media_to_caption"]["edges"][0]["node"][ 109 | "text" 110 | ] 111 | ) 112 | ) 113 | except IndexError: 114 | pass 115 | x = len(raw_tags) 116 | for i in range(x): 117 | tag_lis.extend(raw_tags[i]) 118 | self.tags = dict(collections.Counter(tag_lis)) 119 | 120 | return self.tags 121 | 122 | def print_data_(self): 123 | self._extracted_from_print_data_2() 124 | print(f"{gr}[+]{nu} most used user tags : \n") 125 | o = 0 126 | for key, value in collections.Counter(self.tags).most_common(): 127 | print(f"{gr}{key} : {nu}{value}") 128 | o += 1 129 | if o == 5: 130 | break 131 | print("") 132 | 133 | def print_data(self): 134 | self._extracted_from_print_data_2() 135 | 136 | def _extracted_from_print_data_2(self): 137 | os.system("clear") 138 | extra.banner() 139 | for key, value in self.output.items(): 140 | print(f"{gr}{key} : {nu}{value}") 141 | print("") 142 | 143 | def make_dir(self): 144 | try: 145 | os.mkdir(self.user) 146 | os.chdir(self.user) 147 | except FileExistsError: 148 | os.chdir(self.user) 149 | 150 | def scrap_uploads(self): 151 | if self.output["private "].lower() == "true": 152 | print(f"{re}[!]{gr} private profile can't scrap data !\n") 153 | return 1 154 | else: 155 | posts = {} 156 | print(f"{gr}[+]{nu} user uploads data : \n") 157 | for index, post in enumerate( 158 | self.p_data["edge_owner_to_timeline_media"]["edges"] 159 | ): 160 | # GET PICTURE URL AND SHORTEN IT 161 | print( 162 | f"{gr}picture : {nu}{extra.tiny_url(str(post['node']['thumbnail_resources'][0]['src']))}" 163 | ) 164 | # IF PIC HAS NO CAPTIONS > SKIP / PRINT 165 | try: 166 | print( 167 | f"{gr}Caption : {nu}{post['node']['edge_media_to_caption']['edges'][0]['node']['text']}" 168 | ) 169 | except IndexError: 170 | pass 171 | posts[index] = { 172 | "comments": str(post["node"]["edge_media_to_comment"]["count"]), 173 | "comments disabled": str(post["node"]["comments_disabled"]), 174 | "timestamp": str(post["node"]["taken_at_timestamp"]), 175 | "likes": str(post["node"]["edge_liked_by"]["count"]), 176 | "location": str(post["node"]["location"]), 177 | "accessability caption": str(post["node"]["accessibility_caption"]), 178 | } 179 | 180 | for key, value in posts[index].items(): 181 | print(f"{gr}{key} : {nu}{value}") 182 | print("") 183 | 184 | def most_common_tags(self): 185 | print(f"{gr}[+]{nu} user uploads tags : \n") 186 | for key, value in collections.Counter(self.tags).most_common(): 187 | print(f"{gr}{key} : {nu}{value}") 188 | 189 | def save_data(self): 190 | self.make_dir() 191 | # DOWNLOAD PROFILE PICTURE 192 | with open(f"profile_pic.jpg", "wb") as f: 193 | time.sleep(1) 194 | r = requests.get( 195 | self.output["profile pic url "], 196 | headers={"User-Agent": random.choice(useragent)}, 197 | ) 198 | f.write(r.content) 199 | print(f"{gr}[+]{nu} saved pic to {os.getcwd()}/profile_pic.jpg") 200 | 201 | # SAVES PROFILE DATA TO TEXT FILE 202 | self.output_data = { 203 | "username": str(self.p_data["username"]), 204 | "name": str(self.p_data["full_name"]), 205 | "url": str(f"instagram.com/{self.p_data['username']}"), 206 | "followers": str(self.p_data["edge_followed_by"]["count"]), 207 | "following": str(self.p_data["edge_follow"]["count"]), 208 | "posts": str(self.p_data["edge_owner_to_timeline_media"]["count"]), 209 | "bio": str(self.p_data["biography"]), 210 | "external url": str(self.p_data["external_url"]), 211 | "private": str(self.p_data["is_private"]), 212 | "verified": str(self.p_data["is_verified"]), 213 | "profile pic url": extra.tiny_url(str(self.p_data["profile_pic_url_hd"])), 214 | "business account": str(self.p_data["is_business_account"]), 215 | "connected to fb": str(self.p_data["connected_fb_page"]), 216 | "joined recently": str(self.p_data["is_joined_recently"]), 217 | "business category": str(self.p_data["business_category_name"]), 218 | } 219 | with open(f"profile_data.txt", "w") as f: 220 | f.write(json.dumps(self.output_data)) 221 | print(f"{gr}[+]{nu} saved data to {os.getcwd()}/profile_data.txt") 222 | 223 | # SAVES INFORMATION 224 | posts = {} 225 | for index, post in enumerate( 226 | self.p_data["edge_owner_to_timeline_media"]["edges"] 227 | ): 228 | posts[index] = { 229 | "comments": str(post["node"]["edge_media_to_comment"]["count"]), 230 | "comments disabled": str(post["node"]["comments_disabled"]), 231 | "timestamp": str(post["node"]["taken_at_timestamp"]), 232 | "likes": str(post["node"]["edge_liked_by"]["count"]), 233 | "location": str(post["node"]["location"]), 234 | "accessability caption": str(post["node"]["accessibility_caption"]), 235 | "picture": extra.tiny_url( 236 | str(post["node"]["thumbnail_resources"][0]["src"]) 237 | ), 238 | } 239 | 240 | try: 241 | post[index]["caption"] = str( 242 | post["node"]["edge_media_to_caption"]["edges"][0]["node"]["text"] 243 | ) 244 | except KeyError: 245 | pass 246 | 247 | with open(f"posts_data.txt", "w") as f: 248 | f.write(json.dumps(posts)) 249 | print(f"{gr}[+]{nu} saved post info to {os.getcwd()}/posts_data.txt") 250 | 251 | # SAVES TAGS 252 | with open(f"tags.txt", "w") as f: 253 | f.write(json.dumps(tag_lis)) 254 | print(f"{gr}[+]{nu} saved tags to {os.getcwd()}/posts_data.txt\n") 255 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python3 2 | 3 | import os 4 | import argparse 5 | from api.api import * 6 | 7 | ap = argparse.ArgumentParser() 8 | ap.add_argument("-u", "--username", required=True, 9 | help="username of account to scan") 10 | ap.add_argument("-p", "--postscrap", action='store_true', 11 | help="scrape all uploaded images info ") 12 | ap.add_argument("-s", "--savedata", action='store_true', 13 | help="save data to file") 14 | ap.add_argument("-t", "--tagscrap", action="store_true", 15 | help="list often used tags") 16 | args = vars(ap.parse_args()) 17 | 18 | os.system('clear') if os.name == 'posix' else os.system('cls') 19 | ig = main(user=args["username"]) 20 | if args['tagscrap']: 21 | ig.print_data() 22 | else: 23 | ig.print_data_() 24 | 25 | if args['postscrap']: 26 | ig.scrap_uploads() 27 | 28 | if args['savedata']: 29 | ig.save_data() 30 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.6.0 2 | certifi==2022.12.7 3 | charset-normalizer==2.0.3 4 | idna==3.2 5 | requests==2.26.0 6 | urllib3==1.26.6 7 | --------------------------------------------------------------------------------