├── .github
    ├── FUNDING.yml
    └── header.svg
├── .gitignore
├── LICENSE
├── README.md
├── api
    └── api.py
├── main.py
└── requirements.txt


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | 
2 | custom:             ["https://paypal.me/yassertahiri"]
3 | 


--------------------------------------------------------------------------------
/.github/header.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg viewBox="0 0 1200 200" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 3 |     <defs>
 4 |         <style type="text/css">
 5 |             html,
 6 |             body {
 7 |                 height: 100%;
 8 |                 font-weight: 800;
 9 |             }
10 | 
11 |             svg {
12 |                 font-family: Helvetica, sans-serif;
13 |             }
14 | 
15 |             svg {
16 |                 display: block;
17 |                 font: 10.5em 'Helvetica';
18 |                 width: 960px;
19 |                 height: 300px;
20 |                 margin: 0 auto;
21 |             }
22 | 
23 |             .text-copy {
24 |                 fill: none;
25 |                 stroke: white;
26 |                 stroke-dasharray: 6% 29%;
27 |                 stroke-width: 5px;
28 |                 stroke-dashoffset: 0%;
29 |                 animation: stroke-offset 5.5s infinite linear;
30 |             }
31 | 
32 |             .text-copy:nth-child(1) {
33 |                 stroke: #264653;
34 |                 animation-delay: -1;
35 |             }
36 | 
37 |             .text-copy:nth-child(2) {
38 |                 stroke: #2a9d8f;
39 |                 animation-delay: -2s;
40 |             }
41 | 
42 |             .text-copy:nth-child(3) {
43 |                 stroke: #e9c46a;
44 |                 animation-delay: -3s;
45 |             }
46 | 
47 |             .text-copy:nth-child(4) {
48 |                 stroke: #f4a261;
49 |                 animation-delay: -4s;
50 |             }
51 | 
52 |             .text-copy:nth-child(5) {
53 |                 stroke: #e76f51;
54 |                 animation-delay: -5s;
55 |             }
56 | 
57 |             @keyframes stroke-offset {
58 |                 100% {
59 |                     stroke-dashoffset: -35%;
60 |                 }
61 |             }
62 |         </style>
63 |     </defs>
64 |     <symbol id="a">
65 |         <text x="50%" y="80%" text-anchor="middle">SoIG</text>
66 |     </symbol>
67 |     <g class="g-ants">
68 |         <use class="text-copy" xlink:href="#a" />
69 |         <use class="text-copy" xlink:href="#a" />
70 |         <use class="text-copy" xlink:href="#a" />
71 |         <use class="text-copy" xlink:href="#a" />
72 |         <use class="text-copy" xlink:href="#a" />
73 |     </g>
74 | </svg>


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Yasser Tahiri
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![SoIG](.github/header.svg)
 2 | 
 3 | # SoIG
 4 | 
 5 | The Instagram OSINT Tool gets a range of information from an Instagram account that you normally wouldn't be able to get from just looking at their profile.
 6 | 
 7 | ## Getting Started
 8 | 
 9 | **The information includes**:
10 | 
11 | - **profile** : Username, Profile Name, URL, Followers, Following, Number of Posts, Bio, Profile Picture URL, Is Business Account , Connected to a FB account , External URL, Joined Recently , Business Category Name, Is private , Is Verified.
12 | 
13 | - **tags** : most used , and by `-t` all used tags.
14 | 
15 | - **posts** : accessability caption, location, timestamp, comments disabled, Caption, picture url.
16 | 
17 | ### Prerequisites
18 | 
19 | - Python 3 or higher.
20 | - Linux OS ex. Ubuntu or Kali Linux.
21 | 
22 | ### Project setup
23 | 
24 | ```sh
25 | # clone the repo
26 | $ git clone https://github.com/yezz123/SoIG
27 | 
28 | # move to the project folder
29 | $ cd SoIG
30 | ```
31 | 
32 | ### Creating virtual environment
33 | 
34 | - Create a virtual environment using virtualenv.
35 | 
36 | ```shell
37 | # creating virtual environment
38 | $ virtualenv venv
39 | 
40 | # activate virtual environment
41 | $ source venv/bin/activate
42 | 
43 | # install all dependencies
44 | $ pip install -r requirements.txt
45 | ```
46 | 
47 | ### Running the Application
48 | 
49 | - You can run the `main.py` file Manually.
50 | 
51 | ```sh
52 | # Running the Script
53 | $ python3 main.py
54 | ```
55 | 
56 | ## Usage
57 | 
58 | A simple workflow of command used to run SoIG with a Description.
59 | 
60 | | Usage                                 | Description                                               |
61 | | ------------------------------------- | --------------------------------------------------------- |
62 | | `-h` , `--help`                       | show this help message and exit.                          |
63 | | `-u` USERNAME , `--username` USERNAME | username of account to scan.                              |
64 | | `-p` , `--postscrap`                  | scrape all uploaded images info.                          |
65 | | `-s` , `--savedata`                   | save data to file ( save profile pic, info , post info ). |
66 | 
67 | ## Contributing
68 | 
69 | - Join the SoIG Creator and Contribute to the Project if you have any enhancement or add-ons to create a good and Secure Project, Help any User to Use it in a good and simple way.
70 | 
71 | ### Disclaimer
72 | 
73 | - This project can only be used for educational purposes. Using this software against target systems without prior permission is illegal, and any damages from misuse of this software will not be the responsibility of the author.
74 | 
75 | ## License
76 | 
77 | This project is licensed under the terms of the [MIT license](LICENSE).
78 | 


--------------------------------------------------------------------------------
/api/api.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python3
  2 | 
  3 | import time
  4 | import json
  5 | import random
  6 | import os
  7 | import sys
  8 | import requests
  9 | import collections
 10 | import urllib.request
 11 | from bs4 import BeautifulSoup
 12 | 
 13 | collections.Callable = collections.abc.Callable
 14 | 
 15 | nu = "\033[0m"
 16 | re = "\033[1;31m"
 17 | gr = "\033[1;32m"
 18 | cy = "\033[1;36m"
 19 | 
 20 | raw_tags = []
 21 | tag_lis = []
 22 | 
 23 | useragent = [
 24 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36",
 25 |     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393"
 26 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36",
 27 |     "Mozilla/5.0 (iPad; CPU OS 8_4_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H321 Safari/600.1.4"
 28 |     "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14",
 29 |     "Mozilla/5.0 (Linux; U; Android-4.0.3; en-us; Galaxy Nexus Build/IML74K) AppleWebKit/535.7 (KHTML, like Gecko) CrMo/16.0.912.75 Mobile Safari/535.7",
 30 | ]
 31 | 
 32 | 
 33 | class extra:
 34 |     def tiny_url(url):
 35 |         apiurl = "http://tinyurl.com/api-create.php?url="
 36 |         tinyurl = urllib.request.urlopen(apiurl + url).read()
 37 |         return tinyurl.decode("utf-8")
 38 | 
 39 |     def write(in_text):
 40 |         for char in in_text:
 41 |             time.sleep(0.1)
 42 |             sys.stdout.write(char)
 43 |             sys.stdout.flush()
 44 | 
 45 |     def extract_hash_tags(stri):
 46 |         return [part[1:] for part in stri.split() if part.startswith("#")]
 47 | 
 48 |     def banner():
 49 |         print(
 50 |             f"""{cy}
 51 |   ╔═╗ ╔═╗     ╦  ╔═╗
 52 |   ╚═╗ ║ ║     ║  ║ ╦
 53 |   ╚═╝ ╚═╝  {gr}o{cy}  ╩  ╚═╝
 54 |  
 55 |         {gr}Coded By :
 56 |   {gr}Yezz123{nu}
 57 | 	            """
 58 |         )
 59 | 
 60 | 
 61 | class main:
 62 |     def __init__(self, user):
 63 |         self.user = user
 64 |         self.get_profile()
 65 | 
 66 |     def get_profile(self):
 67 |         extra.write(f"\n{gr}[+]{nu} getting profile ...")
 68 |         profile = requests.get(
 69 |             f"https://www.instagram.com/{self.user}",
 70 |             headers={
 71 |                 "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
 72 |             },
 73 |         )
 74 |         soup = BeautifulSoup(profile.text, "html.parser")
 75 |         more_data = soup.find_all("script", attrs={"type": "text/javascript"})
 76 |         self.data = json.loads(more_data[3].get_text()[21:].strip(";"))
 77 |         self.p_data = self.data["entry_data"]["ProfilePage"][0]["graphql"]["user"]
 78 |         self.output = {
 79 |             "username         ": str(self.p_data["username"]),
 80 |             "name             ": str(self.p_data["full_name"]),
 81 |             "url              ": str(f"instagram.com/{self.p_data['username']}"),
 82 |             "followers        ": str(self.p_data["edge_followed_by"]["count"]),
 83 |             "following        ": str(self.p_data["edge_follow"]["count"]),
 84 |             "posts            ": str(
 85 |                 self.p_data["edge_owner_to_timeline_media"]["count"]
 86 |             ),
 87 |             "bio              ": str(self.p_data["biography"].replace("\n", ", ")),
 88 |             "external url     ": str(self.p_data["external_url"]),
 89 |             "private          ": str(self.p_data["is_private"]),
 90 |             "verified         ": str(self.p_data["is_verified"]),
 91 |             "profile pic url  ": extra.tiny_url(str(self.p_data["profile_pic_url_hd"])),
 92 |             "business account ": str(self.p_data["is_business_account"]),
 93 |             "connected to fb  ": str(self.p_data["connected_fb_page"]),
 94 |             "joined recently  ": str(self.p_data["is_joined_recently"]),
 95 |             "business category": str(self.p_data["business_category_name"]),
 96 |         }
 97 | 
 98 |         if str(self.p_data["is_private"]).lower() == "true":
 99 |             print(f"{re}[!]{gr} private profile can't scrap data !\n")
100 |             return 1
101 |         else:
102 |             for index, post in enumerate(
103 |                 self.p_data["edge_owner_to_timeline_media"]["edges"]
104 |             ):
105 |                 try:
106 |                     raw_tags.append(
107 |                         extra.extract_hash_tags(
108 |                             post["node"]["edge_media_to_caption"]["edges"][0]["node"][
109 |                                 "text"
110 |                             ]
111 |                         )
112 |                     )
113 |                 except IndexError:
114 |                     pass
115 |             x = len(raw_tags)
116 |             for i in range(x):
117 |                 tag_lis.extend(raw_tags[i])
118 |             self.tags = dict(collections.Counter(tag_lis))
119 | 
120 |         return self.tags
121 | 
122 |     def print_data_(self):
123 |         self._extracted_from_print_data_2()
124 |         print(f"{gr}[+]{nu} most used user tags : \n")
125 |         o = 0
126 |         for key, value in collections.Counter(self.tags).most_common():
127 |             print(f"{gr}{key} : {nu}{value}")
128 |             o += 1
129 |             if o == 5:
130 |                 break
131 |         print("")
132 | 
133 |     def print_data(self):
134 |         self._extracted_from_print_data_2()
135 | 
136 |     def _extracted_from_print_data_2(self):
137 |         os.system("clear")
138 |         extra.banner()
139 |         for key, value in self.output.items():
140 |             print(f"{gr}{key} : {nu}{value}")
141 |         print("")
142 | 
143 |     def make_dir(self):
144 |         try:
145 |             os.mkdir(self.user)
146 |             os.chdir(self.user)
147 |         except FileExistsError:
148 |             os.chdir(self.user)
149 | 
150 |     def scrap_uploads(self):
151 |         if self.output["private          "].lower() == "true":
152 |             print(f"{re}[!]{gr} private profile can't scrap data !\n")
153 |             return 1
154 |         else:
155 |             posts = {}
156 |             print(f"{gr}[+]{nu} user uploads data : \n")
157 |             for index, post in enumerate(
158 |                 self.p_data["edge_owner_to_timeline_media"]["edges"]
159 |             ):
160 |                 # GET PICTURE URL AND SHORTEN IT
161 |                 print(
162 |                     f"{gr}picture : {nu}{extra.tiny_url(str(post['node']['thumbnail_resources'][0]['src']))}"
163 |                 )
164 |                 # IF PIC HAS NO CAPTIONS > SKIP / PRINT
165 |                 try:
166 |                     print(
167 |                         f"{gr}Caption : {nu}{post['node']['edge_media_to_caption']['edges'][0]['node']['text']}"
168 |                     )
169 |                 except IndexError:
170 |                     pass
171 |                 posts[index] = {
172 |                     "comments": str(post["node"]["edge_media_to_comment"]["count"]),
173 |                     "comments disabled": str(post["node"]["comments_disabled"]),
174 |                     "timestamp": str(post["node"]["taken_at_timestamp"]),
175 |                     "likes": str(post["node"]["edge_liked_by"]["count"]),
176 |                     "location": str(post["node"]["location"]),
177 |                     "accessability caption": str(post["node"]["accessibility_caption"]),
178 |                 }
179 | 
180 |                 for key, value in posts[index].items():
181 |                     print(f"{gr}{key} : {nu}{value}")
182 |                 print("")
183 | 
184 |     def most_common_tags(self):
185 |         print(f"{gr}[+]{nu} user uploads tags : \n")
186 |         for key, value in collections.Counter(self.tags).most_common():
187 |             print(f"{gr}{key} : {nu}{value}")
188 | 
189 |     def save_data(self):
190 |         self.make_dir()
191 |         # DOWNLOAD PROFILE PICTURE
192 |         with open(f"profile_pic.jpg", "wb") as f:
193 |             time.sleep(1)
194 |             r = requests.get(
195 |                 self.output["profile pic url  "],
196 |                 headers={"User-Agent": random.choice(useragent)},
197 |             )
198 |             f.write(r.content)
199 |         print(f"{gr}[+]{nu} saved pic to {os.getcwd()}/profile_pic.jpg")
200 | 
201 |         # SAVES PROFILE DATA TO TEXT FILE
202 |         self.output_data = {
203 |             "username": str(self.p_data["username"]),
204 |             "name": str(self.p_data["full_name"]),
205 |             "url": str(f"instagram.com/{self.p_data['username']}"),
206 |             "followers": str(self.p_data["edge_followed_by"]["count"]),
207 |             "following": str(self.p_data["edge_follow"]["count"]),
208 |             "posts": str(self.p_data["edge_owner_to_timeline_media"]["count"]),
209 |             "bio": str(self.p_data["biography"]),
210 |             "external url": str(self.p_data["external_url"]),
211 |             "private": str(self.p_data["is_private"]),
212 |             "verified": str(self.p_data["is_verified"]),
213 |             "profile pic url": extra.tiny_url(str(self.p_data["profile_pic_url_hd"])),
214 |             "business account": str(self.p_data["is_business_account"]),
215 |             "connected to fb": str(self.p_data["connected_fb_page"]),
216 |             "joined recently": str(self.p_data["is_joined_recently"]),
217 |             "business category": str(self.p_data["business_category_name"]),
218 |         }
219 |         with open(f"profile_data.txt", "w") as f:
220 |             f.write(json.dumps(self.output_data))
221 |         print(f"{gr}[+]{nu} saved data to {os.getcwd()}/profile_data.txt")
222 | 
223 |         # SAVES INFORMATION
224 |         posts = {}
225 |         for index, post in enumerate(
226 |             self.p_data["edge_owner_to_timeline_media"]["edges"]
227 |         ):
228 |             posts[index] = {
229 |                 "comments": str(post["node"]["edge_media_to_comment"]["count"]),
230 |                 "comments disabled": str(post["node"]["comments_disabled"]),
231 |                 "timestamp": str(post["node"]["taken_at_timestamp"]),
232 |                 "likes": str(post["node"]["edge_liked_by"]["count"]),
233 |                 "location": str(post["node"]["location"]),
234 |                 "accessability caption": str(post["node"]["accessibility_caption"]),
235 |                 "picture": extra.tiny_url(
236 |                     str(post["node"]["thumbnail_resources"][0]["src"])
237 |                 ),
238 |             }
239 | 
240 |             try:
241 |                 post[index]["caption"] = str(
242 |                     post["node"]["edge_media_to_caption"]["edges"][0]["node"]["text"]
243 |                 )
244 |             except KeyError:
245 |                 pass
246 | 
247 |         with open(f"posts_data.txt", "w") as f:
248 |             f.write(json.dumps(posts))
249 |         print(f"{gr}[+]{nu} saved post info to {os.getcwd()}/posts_data.txt")
250 | 
251 |         # SAVES TAGS
252 |         with open(f"tags.txt", "w") as f:
253 |             f.write(json.dumps(tag_lis))
254 |         print(f"{gr}[+]{nu} saved tags to {os.getcwd()}/posts_data.txt\n")
255 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python3
 2 | 
 3 | import os
 4 | import argparse
 5 | from api.api import *
 6 | 
 7 | ap = argparse.ArgumentParser()
 8 | ap.add_argument("-u", "--username", required=True,
 9 |                 help="username of account to scan")
10 | ap.add_argument("-p", "--postscrap", action='store_true',
11 |                 help="scrape all uploaded images info ")
12 | ap.add_argument("-s", "--savedata", action='store_true',
13 |                 help="save data to file")
14 | ap.add_argument("-t", "--tagscrap", action="store_true",
15 |                 help="list often used tags")
16 | args = vars(ap.parse_args())
17 | 
18 | os.system('clear') if os.name == 'posix' else os.system('cls')
19 | ig = main(user=args["username"])
20 | if args['tagscrap']:
21 |     ig.print_data()
22 | else:
23 |     ig.print_data_()
24 | 
25 | if args['postscrap']:
26 |     ig.scrap_uploads()
27 | 
28 | if args['savedata']:
29 |     ig.save_data()
30 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.6.0
2 | certifi==2022.12.7
3 | charset-normalizer==2.0.3
4 | idna==3.2
5 | requests==2.26.0
6 | urllib3==1.26.6
7 | 


--------------------------------------------------------------------------------