├── .github └── FUNDING.yml ├── LICENSE ├── README.md ├── api.php └── insta_data_scrap.class.php /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 https://github.com/iamatulsingh - Atul Singh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Insta Data Scrap 2 | [![built with PHP8](https://img.shields.io/badge/built%20with-PHP8-blue.svg)](https://www.php.net/)
3 | Insta Data Scrap is an API to scrap some details of any Instagram users using their username. Using this API you can scrap some details even if account is in private mode. It dosen't need Instagram API to fetch data by using Access Token. 4 | 5 | >New code added to get data from calling API (this unofficial one).
6 | >NOTE: allow_url_fopen must be enable in php.ini 7 | ## How to use it, using API call? 8 | Example: 9 | 10 | Just fetch API using ```http://domain.com/api.php?u=username```. 11 | fetch hashtag data using API ```http://domain.com/api.php?hashtag=tag```. 12 | 13 | combined API call ```http://domain.com/api.php?u=username&hashtag=cristiano```. 14 | 15 | ## How to use it, using Class import? 16 | Example: 17 | ``` 18 | require_once('insta_data_scrap.class.php'); 19 | $username = "username"; 20 | $insta = new InstaData(); 21 | $userDetails = $insta->getUserDetails($username); 22 | $accountDetails = $insta->getAccountDetails($username); 23 | $userData = json_decode($userDetails,true); 24 | $accountData = json_decode($accountDetails,true); 25 | $timeLine = $insta->getTimeLine($username); 26 | ``` 27 | 28 | ## How to get hashtag data 29 | 30 | ``` 31 | $hashtag_likes = $insta->getTagLikes('photooftheday'); // here 'photooftheday' is hashtag string 32 | $hashtag_details = $insta->getTagData('photooftheday'); 33 | 34 | echo $hashtag_likes . "

"; 35 | $count = $hashtag_details['count']; 36 | $hastagData = $hashtag_details['data']; 37 | 38 | for($i=0;$i<$count;$i++){ 39 | echo $hastagData[$i]['hashtag_img'] . "
"; 40 | echo $hastagData[$i]['hashtag_txt'] . "
"; 41 | echo $hastagData[$i]['hashtag_time'] . "
"; 42 | } 43 | ``` 44 | 45 | ## Use this to print details 46 | ``` 47 | 48 | $count = $timeLine['count']; 49 | $timeLineData = $timeLine['data']; 50 | 51 | for($i=0;$i<$count;$i++){ 52 | echo $timeLineData[$i]['post_img'] . "
"; 53 | echo $timeLineData[$i]['post_txt'] . "
"; 54 | echo $timeLineData[$i]['post_time'] . "
"; 55 | echo $timeLineData[$i]['post_likes'] . "
"; 56 | echo $timeLineData[$i]['post_comments'] . "
"; 57 | } 58 | 59 | print_r($userData); 60 | print_r($accountData); 61 | ``` 62 | 63 | ## You can print UserData and AccountData in readable format using below code 64 | ``` 65 | echo $userData['img']; 66 | echo $userData['full_name']; 67 | echo $userData['username']; 68 | echo $userData['is_verified']; 69 | echo $userData['id']; 70 | echo $userData['instaUrl']; 71 | echo $accountData['followers']; 72 | echo $accountData['follow']; 73 | echo $accountData['posts']; 74 | ``` 75 | -------------------------------------------------------------------------------- /api.php: -------------------------------------------------------------------------------- 1 | getUserDetails($username); 24 | $accountDetails = $insta->getAccountDetails($username); 25 | $timeLine = $insta->getTimeLine($username); 26 | // $tagLikes = $insta->getTagLikes($username); 27 | 28 | // create single json array with all data 29 | error_reporting(~E_WARNING); 30 | $instaData = array("userDetails"=>$userDetails, "accountDetails"=>$accountDetails, "timeLineData"=>$timeLine, 31 | "hashTagLikes"=>$tagLikes, "tagData"=>$tagData); 32 | echo json_encode($instaData, JSON_PRETTY_PRINT); 33 | 34 | } 35 | if($_GET['hashtag'] != ""){ 36 | $insta = new InstaData(); 37 | $hastag = $_GET['hashtag']; 38 | $tagData = $insta->getTagData($hastag); 39 | // echo $tagData; 40 | echo json_encode($tagData, JSON_PRETTY_PRINT); 41 | } 42 | else{ 43 | echo ''; 44 | } 45 | 46 | class InstaData{ 47 | 48 | public function getData($username){ 49 | $options = array('http' => array('user_agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36' 50 | ) 51 | ); 52 | 53 | $context = stream_context_create($options); 54 | error_reporting(~E_WARNING); 55 | if(($instaLink = file_get_contents('https://www.instagram.com/' . $username, false, $context)) == false){ 56 | echo "Error: Link not found, user-id is invalid"; 57 | exit; 58 | } 59 | 60 | return $instaLink; 61 | } 62 | 63 | public function fetchUserDetails($username){ 64 | 65 | $instaLink = $this->getData($username); 66 | $instaIDPattern = '/window._sharedData = (.*)/'; 67 | if (!preg_match($instaIDPattern, $instaLink, $matches)) { 68 | exit; 69 | } 70 | $trim_data = substr($matches[1], 0, -10); 71 | $json_output = json_decode($trim_data,true); 72 | $json_output = $json_output['entry_data']['ProfilePage']['0']['graphql']['user']; 73 | return $json_output; 74 | } 75 | 76 | public function fetchAccountDetails($username){ 77 | 78 | $details = ""; 79 | $instaLink = $this->getData($username); 80 | $detailsPattern = '/meta content=(.\d+)(.*)/'; 81 | if (preg_match($detailsPattern, $instaLink, $res)) { 82 | if (strpos($res[0], 'Followers') !== false) { 83 | $details = $res[1]. "" .$res[2]; 84 | } 85 | } 86 | $input_line = substr($details, 1, -23); 87 | $userDetails = preg_split("/, /", $input_line); 88 | 89 | return $userDetails; 90 | } 91 | 92 | public function getTimeLine($username){ 93 | $instaLink = $this->getData($username); 94 | $instaIDPattern = '/window._sharedData = (.*)/'; 95 | if (!preg_match($instaIDPattern, $instaLink, $matches)) { 96 | exit; 97 | } 98 | $trim_data = substr($matches[1], 0, -10); 99 | $json_output = json_decode($trim_data,true); 100 | $json_output = $json_output['entry_data']['ProfilePage']['0']['graphql']['user']['edge_owner_to_timeline_media']['edges']; 101 | $count = count($json_output); 102 | $timeLine = Array(); 103 | for($i=0;$i<$count;$i++){ 104 | error_reporting(~E_NOTICE); 105 | $post_txt = $json_output[$i]['node']['edge_media_to_caption']['edges'] ? $json_output[$i]['node']['edge_media_to_caption']['edges']['0']['node']['text'] : ""; 106 | $post_img = $json_output[$i]['node']['display_url']; 107 | $post_likes = $json_output[$i]['node']['edge_liked_by']['count']; 108 | $post_comments = $json_output[$i]['node']['edge_media_to_comment']['count']; 109 | $post_time = $json_output[$i]['node']['taken_at_timestamp']; 110 | $date = new DateTime("@$post_time"); 111 | $timeLine[$i]['post_img'] = $post_img; 112 | $timeLine[$i]['post_txt'] = $post_txt; 113 | $timeLine[$i]['post_time'] = $date->format('Y-m-d H:i:s'); 114 | $timeLine[$i]['post_likes'] = $post_likes; 115 | $timeLine[$i]['post_comments'] = $post_comments; 116 | } 117 | return Array('data'=>$timeLine,'count'=>$count); 118 | } 119 | 120 | public function getUserDetails($username){ 121 | 122 | $json_output = $this->fetchUserDetails($username); 123 | $userData = array(); 124 | $userData['img'] = $json_output['profile_pic_url_hd']; 125 | $userData['full_name'] = $json_output['full_name']; 126 | $userData['username'] = $json_output['username']; 127 | $userData['is_verified'] = "false"; 128 | if($json_output['is_verified']) 129 | $userData['is_verified'] = "true"; 130 | else 131 | $userData['is_verified'] = "false"; 132 | $userData['id'] = $json_output['id']; 133 | $userData['instaUrl'] = "https://instagram.com/".$json_output['username']; 134 | // $json_userData = json_encode($userData); 135 | 136 | // return $json_userData; 137 | return $userData; 138 | } 139 | 140 | public function getAccountDetails($username){ 141 | 142 | $userDetails = $this->fetchAccountDetails($username); 143 | $accountData = array(); 144 | $accountData['followers'] = $userDetails[0]; 145 | $accountData['follow'] = $userDetails[1]; 146 | $temp = preg_split("/ -/", $userDetails[2]); 147 | $accountData['posts'] = $temp[0]; 148 | // $json_accountData = json_encode($accountData); 149 | 150 | // return $json_accountData; 151 | return $accountData; 152 | } 153 | 154 | 155 | // New code added here 156 | 157 | public function getHashTageData($hashtag){ 158 | $ch = curl_init(); 159 | 160 | curl_setopt($ch, CURLOPT_URL, 'https://i.instagram.com/api/v1/tags/logged_out_web_info/?tag_name=' . $hashtag); 161 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 162 | curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET'); 163 | 164 | curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate'); 165 | 166 | $headers = array(); 167 | $headers[] = 'Authority: i.instagram.com'; 168 | $headers[] = 'Accept: */*'; 169 | $headers[] = 'Accept-Language: en-US,en;q=0.9'; 170 | $headers[] = 'Cookie: csrftoken=AtXwOU4HakeAhVPX5ymMG5NerRCINGvg; mid=YuqMTwAEAAFwCGMwsQpx3HuH-L1n; ig_did=2A6AF10E-74DE-4398-A1E6-5AD079D49602; dpr=2; datr=Y4zqYg2zy9EU5ozONcNPvGYL'; 171 | $headers[] = 'Origin: https://www.instagram.com'; 172 | $headers[] = 'Referer: https://www.instagram.com/'; 173 | $headers[] = 'Sec-Ch-Ua: \".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"'; 174 | $headers[] = 'Sec-Ch-Ua-Mobile: ?0'; 175 | $headers[] = 'Sec-Ch-Ua-Platform: \"Linux\"'; 176 | $headers[] = 'Sec-Fetch-Dest: empty'; 177 | $headers[] = 'Sec-Fetch-Mode: cors'; 178 | $headers[] = 'Sec-Fetch-Site: same-site'; 179 | $headers[] = 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'; 180 | $headers[] = 'X-Asbd-Id: 198387'; 181 | $headers[] = 'X-Csrftoken: AtXwOU4HakeAhVPX5ymMG5NerRCINGvg'; 182 | $headers[] = 'X-Ig-App-Id: 936619743392459'; 183 | $headers[] = 'X-Ig-Www-Claim: 0'; 184 | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 185 | 186 | $hashtagData = curl_exec($ch); 187 | if (curl_errno($ch)) { 188 | echo "Error: Hash tag value is not available for " . $hashtag; 189 | echo 'Error:' . curl_error($ch); 190 | exit; 191 | } 192 | curl_close($ch); 193 | 194 | return $hashtagData; 195 | } 196 | 197 | public function getTagLikes($hashtag){ 198 | $instaHashtag = $this->getHashTageData($hashtag); 199 | $json_output = json_decode($instaHashtag,true); 200 | $likes = $json_output['graphql']['hashtag']['edge_hashtag_to_media']['count']; 201 | return $likes; 202 | } 203 | 204 | public function getTagData($hashtag){ 205 | $instaHashtag = $this->getHashTageData($hashtag); 206 | echo $instaHashtag; 207 | $json_output = json_decode($instaHashtag,true); 208 | $json_output = $json_output['graphql']['hashtag']['edge_hashtag_to_media']['edges']; 209 | $count = count($json_output); 210 | $hashtag_data = Array(); 211 | for($i=0;$i<$count;$i++){ 212 | error_reporting(~E_NOTICE); 213 | $txt = $json_output[$i]['node']['edge_media_to_caption']['edges']['0']['node']['text']; 214 | $post_img = $json_output[$i]['node']['display_url']; 215 | $hashtag_time = $json_output[$i]['node']['taken_at_timestamp']; 216 | $date = new DateTime("@$hashtag_time"); 217 | $hashtag_data[$i]['hashtag_img'] = $post_img; 218 | $hashtag_data[$i]['hashtag_txt'] = $txt; 219 | $hashtag_data[$i]['hashtag_time'] = $date->format('Y-m-d H:i:s'); 220 | } 221 | return Array('data'=>$hashtag_data,'count'=>$count); 222 | } 223 | 224 | } 225 | 226 | 227 | ?> 228 | -------------------------------------------------------------------------------- /insta_data_scrap.class.php: -------------------------------------------------------------------------------- 1 | array('user_agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36' 17 | ) 18 | ); 19 | 20 | $context = stream_context_create($options); 21 | error_reporting(~E_WARNING); 22 | if(($instaLink = file_get_contents('https://www.instagram.com/' . $username, false, $context)) == false){ 23 | echo "Error: Link not found, user-id is invalid"; 24 | exit; 25 | } 26 | 27 | return $instaLink; 28 | } 29 | 30 | public function fetchUserDetails($username){ 31 | 32 | $instaLink = $this->getData($username); 33 | $instaIDPattern = '/window._sharedData = (.*)/'; 34 | if (!preg_match($instaIDPattern, $instaLink, $matches)) { 35 | exit; 36 | } 37 | $trim_data = substr($matches[1], 0, -10); 38 | $json_output = json_decode($trim_data,true); 39 | $json_output = $json_output['entry_data']['ProfilePage']['0']['graphql']['user']; 40 | return $json_output; 41 | } 42 | 43 | public function fetchAccountDetails($username){ 44 | 45 | $details = ""; 46 | $instaLink = $this->getData($username); 47 | $detailsPattern = '/meta content=(.\d+)(.*)/'; 48 | if (preg_match($detailsPattern, $instaLink, $res)) { 49 | if (strpos($res[0], 'Followers') !== false) { 50 | $details = $res[1]. "" .$res[2]; 51 | } 52 | } 53 | $input_line = substr($details, 1, -23); 54 | $userDetails = preg_split("/, /", $input_line); 55 | 56 | return $userDetails; 57 | } 58 | 59 | public function getTimeLine($username){ 60 | $instaLink = $this->getData($username); 61 | $instaIDPattern = '/window._sharedData = (.*)/'; 62 | if (!preg_match($instaIDPattern, $instaLink, $matches)) { 63 | exit; 64 | } 65 | $trim_data = substr($matches[1], 0, -10); 66 | $json_output = json_decode($trim_data,true); 67 | $json_output = $json_output['entry_data']['ProfilePage']['0']['graphql']['user']['edge_owner_to_timeline_media']['edges']; 68 | $count = count($json_output); 69 | $timeLine = Array(); 70 | for($i=0;$i<$count;$i++){ 71 | error_reporting(~E_NOTICE); 72 | $post_txt = $json_output[$i]['node']['edge_media_to_caption']['edges'] ? $json_output[$i]['node']['edge_media_to_caption']['edges']['0']['node']['text'] : ""; 73 | $post_img = $json_output[$i]['node']['display_url']; 74 | $post_likes = $json_output[$i]['node']['edge_liked_by']['count']; 75 | $post_comments = $json_output[$i]['node']['edge_media_to_comment']['count']; 76 | $post_time = $json_output[$i]['node']['taken_at_timestamp']; 77 | $date = new DateTime("@$post_time"); 78 | $timeLine[$i]['post_img'] = $post_img; 79 | $timeLine[$i]['post_txt'] = $post_txt; 80 | $timeLine[$i]['post_time'] = $date->format('Y-m-d H:i:s'); 81 | $timeLine[$i]['post_likes'] = $post_likes; 82 | $timeLine[$i]['post_comments'] = $post_comments; 83 | } 84 | return Array('data'=>$timeLine,'count'=>$count); 85 | } 86 | 87 | public function getUserDetails($username){ 88 | 89 | $json_output = $this->fetchUserDetails($username); 90 | $userData = array(); 91 | $userData['img'] = $json_output['profile_pic_url_hd']; 92 | $userData['full_name'] = $json_output['full_name']; 93 | $userData['username'] = $json_output['username']; 94 | $userData['is_verified'] = "false"; 95 | if($json_output['is_verified']) 96 | $userData['is_verified'] = "true"; 97 | else 98 | $userData['is_verified'] = "false"; 99 | $userData['id'] = $json_output['id']; 100 | $userData['instaUrl'] = "https://instagram.com/".$json_output['username']; 101 | // $json_userData = json_encode($userData); 102 | 103 | // return $json_userData; 104 | return $userData; 105 | } 106 | 107 | public function getAccountDetails($username){ 108 | 109 | $userDetails = $this->fetchAccountDetails($username); 110 | $accountData = array(); 111 | $accountData['followers'] = $userDetails[0]; 112 | $accountData['follow'] = $userDetails[1]; 113 | $temp = preg_split("/ -/", $userDetails[2]); 114 | $accountData['posts'] = $temp[0]; 115 | // $json_accountData = json_encode($accountData); 116 | 117 | // return $json_accountData; 118 | return $accountData; 119 | } 120 | 121 | 122 | // New code added here 123 | 124 | public function getHashTageData($hashtag){ 125 | $ch = curl_init(); 126 | 127 | curl_setopt($ch, CURLOPT_URL, 'https://i.instagram.com/api/v1/tags/logged_out_web_info/?tag_name=' . $hashtag); 128 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 129 | curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET'); 130 | 131 | curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate'); 132 | 133 | $headers = array(); 134 | $headers[] = 'Authority: i.instagram.com'; 135 | $headers[] = 'Accept: */*'; 136 | $headers[] = 'Accept-Language: en-US,en;q=0.9'; 137 | $headers[] = 'Cookie: csrftoken=AtXwOU4HakeAhVPX5ymMG5NerRCINGvg; mid=YuqMTwAEAAFwCGMwsQpx3HuH-L1n; ig_did=2A6AF10E-74DE-4398-A1E6-5AD079D49602; dpr=2; datr=Y4zqYg2zy9EU5ozONcNPvGYL'; 138 | $headers[] = 'Origin: https://www.instagram.com'; 139 | $headers[] = 'Referer: https://www.instagram.com/'; 140 | $headers[] = 'Sec-Ch-Ua: \".Not/A)Brand\";v=\"99\", \"Google Chrome\";v=\"103\", \"Chromium\";v=\"103\"'; 141 | $headers[] = 'Sec-Ch-Ua-Mobile: ?0'; 142 | $headers[] = 'Sec-Ch-Ua-Platform: \"Linux\"'; 143 | $headers[] = 'Sec-Fetch-Dest: empty'; 144 | $headers[] = 'Sec-Fetch-Mode: cors'; 145 | $headers[] = 'Sec-Fetch-Site: same-site'; 146 | $headers[] = 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'; 147 | $headers[] = 'X-Asbd-Id: 198387'; 148 | $headers[] = 'X-Csrftoken: AtXwOU4HakeAhVPX5ymMG5NerRCINGvg'; 149 | $headers[] = 'X-Ig-App-Id: 936619743392459'; 150 | $headers[] = 'X-Ig-Www-Claim: 0'; 151 | curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 152 | 153 | $hashtagData = curl_exec($ch); 154 | if (curl_errno($ch)) { 155 | echo "Error: Hash tag value is not available for " . $hashtag; 156 | echo 'Error:' . curl_error($ch); 157 | exit; 158 | } 159 | curl_close($ch); 160 | 161 | return $hashtagData; 162 | } 163 | 164 | public function getTagLikes($hashtag){ 165 | $instaHashtag = $this->getHashTageData($hashtag); 166 | $json_output = json_decode($instaHashtag,true); 167 | $likes = $json_output['graphql']['hashtag']['edge_hashtag_to_media']['count']; 168 | return $likes; 169 | } 170 | 171 | public function getTagData($hashtag){ 172 | $instaHashtag = $this->getHashTageData($hashtag); 173 | echo $instaHashtag; 174 | $json_output = json_decode($instaHashtag,true); 175 | $json_output = $json_output['graphql']['hashtag']['edge_hashtag_to_media']['edges']; 176 | $count = count($json_output); 177 | $hashtag_data = Array(); 178 | for($i=0;$i<$count;$i++){ 179 | error_reporting(~E_NOTICE); 180 | $txt = $json_output[$i]['node']['edge_media_to_caption']['edges']['0']['node']['text']; 181 | $post_img = $json_output[$i]['node']['display_url']; 182 | $hashtag_time = $json_output[$i]['node']['taken_at_timestamp']; 183 | $date = new DateTime("@$hashtag_time"); 184 | $hashtag_data[$i]['hashtag_img'] = $post_img; 185 | $hashtag_data[$i]['hashtag_txt'] = $txt; 186 | $hashtag_data[$i]['hashtag_time'] = $date->format('Y-m-d H:i:s'); 187 | } 188 | return Array('data'=>$hashtag_data,'count'=>$count); 189 | } 190 | 191 | } 192 | --------------------------------------------------------------------------------