├── .gitignore ├── LICENSE ├── README.md ├── config_sample.php └── vision.php /.gitignore: -------------------------------------------------------------------------------- 1 | config.php 2 | 3 | cache/ 4 | 5 | data/ 6 | 7 | outputs/ 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # memespector 2 | 3 | A simple script for using Google's Vision API. Takes a comma- or tab-separated file containing a column with image URLs as input, sends images to the Vision API and puts the detected annotations back into the list. 4 | 5 | ## Installation 6 | 7 | Follow the following steps for installation on a php equipped machine: 8 | 9 | 1. Download, unzip, and place the script files in some directory. 10 | 2. In the same directory, create three folders (“cache”,”data”,”outputs”) and make sure they can be written to (e.g. via chmod on Unix-like systems). 11 | 3. Go to apis.google.com and get an API key for Google’s Vision API. 12 | 4. Rename the config\_sample.php file to config.php and make the following edits: 13 | * insert the file name containing your image URLs (local or online) into the value of the $inputfile; 14 | * specify the name of the column containing the URLs in the $urlcolumn variable; 15 | * specify the column delimiter in the $csvdelimiter variable (“\t” for tab-separated files or “,” for comma-separated files); 16 | * put your API key into the $apikey variable; 17 | 18 | ## Execution 19 | 20 | Run the script in a terminal using by typing “php vision.php”. The script should start counting up to the number of images and store the answers from the Vision API in the cache folder. If the script is interrupted, it will retrieve the data from these files instead of hitting the API again. When finished, the script will write a new file into the outputs directory with the API results added as new columns. 21 | 22 | ## Credits 23 | 24 | Written by Bernhard Rieder, University of Amsterdam, https://github.com/bernorieder/, http://thepoliticsofsystems.net 25 | 26 | This is as-is software, no support is provided. 27 | -------------------------------------------------------------------------------- /config_sample.php: -------------------------------------------------------------------------------- 1 | FALSE, 47 | "LABEL_DETECTION" => FALSE, 48 | "TEXT_DETECTION" => FALSE, 49 | "WEB_DETECTION" => FALSE, 50 | "FACE_DETECTION" => FALSE 51 | //TO BE FUTURELY IMPLEMENTED 52 | // "CROP_HINTS" 53 | // "IMAGE_PROPERTIES" 54 | // "LANDMARK_DETECTION" 55 | // "LOGO_DETECTION" 56 | ); 57 | 58 | //Limit maximum number of results per aspect 59 | $maxResults = 10; 60 | 61 | // !! CAREFUL !! Your Google Vision API key 62 | $apiKey = "YOUR_API_KEY_HERE"; 63 | 64 | // --------------------------------------- 65 | 66 | // ** INTERNAL SETTINGS (Probably no need to change) ** 67 | 68 | // Folders the script needs, create in the same directory and make sure they can be written to 69 | $dataDir = getcwd() . "/data/"; 70 | $jsonDir = getcwd() . "/cache/"; 71 | $outputsDir = getcwd() . "/outputs/" . $projectName . "/"; 72 | $imgDir = $outputsDir . "IMG/"; 73 | $jsoncopyDir= $outputsDir . "cache_copy" . "/"; 74 | $inputImgDir= $dataDir . "IMG/"; 75 | 76 | if(!file_exists($outputsDir)) { 77 | mkdir($outputsDir); 78 | } 79 | 80 | if($saveImageCopy && !file_exists($imgDir)) { 81 | mkdir($imgDir); 82 | } 83 | 84 | if(!file_exists($jsoncopyDir)) { 85 | mkdir($jsoncopyDir); 86 | } 87 | 88 | 89 | ignore_user_abort(false); 90 | set_time_limit(3600*5); 91 | ini_set("memory_limit","100M"); 92 | ini_set("error_reporting",1); 93 | 94 | ?> 95 | -------------------------------------------------------------------------------- /vision.php: -------------------------------------------------------------------------------- 1 | $status) { 25 | if($status) { 26 | echo " ." . $module . "\n"; 27 | } 28 | } 29 | echo "\n• • • • •\n"; 30 | 31 | echo "Project name:\t" . $projectName . "\n"; 32 | echo "Input file:\t" . $inputFile . "\n"; 33 | 34 | $numImages = count($images); 35 | echo "Images: \t" . count($images) . "\n"; 36 | 37 | if($limit > 0 && $limit <= $numImages) { 38 | $numImages = $limit; 39 | echo "Subset: " . $numImages . "\n• • • • •\n\n"; 40 | } 41 | 42 | // For each image in the dataset 43 | 44 | for($i = 0; $i < $numImages; $i++) { 45 | 46 | // Check if URL field contains something 47 | if (strlen($images[$i][$imagesColumn]) == 0) { 48 | echo ($i + 1) . "\n**ERROR**\nThis row does not seem to have an image URL. Did you configure the column name and delimiter right (see config.php)? Hint: don't use Excel.\n"; 49 | continue; 50 | } 51 | 52 | // Generate hash from URL 53 | $imageID = sha1($images[$i][$imagesColumn]); 54 | 55 | if(!$imagesRemote && !$absolutePath) { 56 | $imagePath = $inputImgDir . $images[$i][$imagesColumn]; 57 | } 58 | else { 59 | $imagePath = $images[$i][$imagesColumn]; 60 | } 61 | 62 | echo "Image " . ($i + 1) . " of " . $numImages . "\n"; 63 | echo "Path: " . $imagePath . "\n"; 64 | echo "Hash: " . $imageID . "\n"; 65 | 66 | // Specific tweaks for Facebook data. Extraction of file extension. 67 | if (array_key_exists("created_time_unix", $images[$i])) { 68 | /* This is facebook specific */ 69 | $images[$i]["created_time"] = date("Y-m-d H:i:s", $images[$i]["created_time_unix"]); 70 | 71 | preg_match_all("/.+\/(.+?)\?/",$imagePath,$out); 72 | $images[$i]["original_filename"] = $out[1][0]; 73 | $ext = pathinfo($images[$i]["original_filename"], PATHINFO_EXTENSION); 74 | } 75 | else { 76 | $ext = pathinfo($imagePath, PATHINFO_EXTENSION); 77 | } 78 | 79 | // Add hash and extension to CSV 80 | $images[$i]["image_id"] = $imageID; 81 | $images[$i]["file_ext"] = $ext; 82 | $images[$i]["copy_filename"] = $imageID . "." . $ext; 83 | 84 | // Make copy of image if set to do so 85 | if($saveImageCopy){ 86 | $localFile = $imgDir . $imageID . "." . $ext; 87 | echo "Copy path: " . $localFile . "\n"; 88 | if(!file_exists($localFile)){ 89 | echo "\tCopying image..."; 90 | copy($imagePath, $localFile); 91 | echo "done.\n"; 92 | } 93 | else { 94 | echo "\tCopy already existed \n"; 95 | } 96 | } 97 | 98 | // Process image (request to API) 99 | if ($forceBase64 && $saveImageCopy) { 100 | $info = processImage($localFile, $imageID); 101 | } 102 | else { 103 | $info = processImage($imagePath, $imageID); 104 | } 105 | 106 | // Catch error (specifically: image not retrievable by the API) 107 | $error= catchError($info); 108 | 109 | // Parse API json response and add it to the processed CSV 110 | foreach ($moduleActivation as $module => $status) { 111 | if(!$status){ 112 | switch ($module) { 113 | case 'LABEL_DETECTION': 114 | $images[$i]["gv_labels"] = "UNDETECTED"; 115 | break; 116 | case 'TEXT_DETECTION': 117 | $images[$i]["gv_text"] = "UNDETECTED"; 118 | break; 119 | case 'SAFE_SEARCH_DETECTION': 120 | $images[$i]["gv_ss_adult"] = "UNDETECTED"; 121 | $images[$i]["gv_ss_spoof"] = "UNDETECTED"; 122 | $images[$i]["gv_ss_medical"] = "UNDETECTED"; 123 | $images[$i]["gv_ss_violence"] = "UNDETECTED"; 124 | break; 125 | case 'WEB_DETECTION': 126 | $images[$i]["gv_web_entities"] = "UNDETECTED"; 127 | $images[$i]["gv_web_full_matching_images"] = "UNDETECTED"; 128 | $images[$i]["gv_web_partial_matching_images"] = "UNDETECTED"; 129 | $images[$i]["gv_web_pages_matching_images"] = "UNDETECTED"; 130 | $images[$i]["gv_web_visually_similar_images"] = "UNDETECTED"; 131 | break; 132 | case 'FACE_DETECTION': 133 | $images[$i]["gv_face_joy"] = "UNDETECTED"; 134 | $images[$i]["gv_face_sorrow"] = "UNDETECTED"; 135 | $images[$i]["gv_face_anger"] = "UNDETECTED"; 136 | $images[$i]["gv_face_surprise"] = "UNDETECTED"; 137 | break; 138 | } 139 | } 140 | else { 141 | switch ($module) { 142 | case 'LABEL_DETECTION': 143 | $labels = array(); 144 | foreach ($info->responses[0]->labelAnnotations as $annotation) { 145 | $labels[] = $annotation->description . "(" . $annotation->score . ")"; 146 | } 147 | $images[$i]["gv_labels"] = implode(",", $labels); 148 | break; 149 | case 'TEXT_DETECTION': 150 | $images[$i]["gv_text"] = clean($info->responses[0]->textAnnotations[0]->description); 151 | break; 152 | case 'SAFE_SEARCH_DETECTION': 153 | $images[$i]["gv_ss_adult"] = $info->responses[0]->safeSearchAnnotation->adult; 154 | $images[$i]["gv_ss_spoof"] = $info->responses[0]->safeSearchAnnotation->spoof; 155 | $images[$i]["gv_ss_medical"] = $info->responses[0]->safeSearchAnnotation->medical; 156 | $images[$i]["gv_ss_violence"] = $info->responses[0]->safeSearchAnnotation->violence; 157 | break; 158 | case 'WEB_DETECTION': 159 | $entities = array(); 160 | foreach ($info->responses[0]->webDetection->webEntities as $annotation) { 161 | $entities[] = $annotation->description . "(" . $annotation->score . ")"; 162 | } 163 | $images[$i]["gv_web_entities"] = implode(",", $entities); 164 | 165 | $branches = array( 'fullMatchingImages' => 'gv_web_full_matching_images', 166 | 'partialMatchingImages' => 'gv_web_partial_matching_images', 167 | 'pagesWithMatchingImages' => 'gv_web_pages_with_matching_images', 168 | 'visuallySimilarImages' => 'gv_web_visually_similar_images' 169 | ); 170 | 171 | foreach ($branches as $branch => $csvfield) { 172 | $urls = array(); 173 | foreach ($info->responses[0]->webDetection->$branch as $annotation) { 174 | $urls[] = str_replace(",", "%2C", $annotation->url); 175 | } 176 | $images[$i][$csvfield] = implode(",", $urls); 177 | } 178 | break; 179 | case 'FACE_DETECTION': 180 | $faces = array(); 181 | $joyHigh = "UNDETECTED"; 182 | $sorrowHigh = "UNDETECTED"; 183 | $angerHigh = "UNDETECTED"; 184 | $surpriseHigh = "UNDETECTED"; 185 | foreach($info->responses[0]->faceAnnotations as $annotation) { 186 | $joyHigh = likelihoodCompare($joyHigh, $annotation->joyLikelihood); 187 | $sorrowHigh = likelihoodCompare($sorrowHigh, $annotation->sorrowLikelihood); 188 | $angerHigh = likelihoodCompare($angerHigh, $annotation->angerLikelihood); 189 | $surpriseHigh = likelihoodCompare($surpriseHigh, $annotation->surpriseLikelihood); 190 | } 191 | $images[$i]["gv_face_joy"] = $joyHigh; 192 | $images[$i]["gv_face_sorrow"] = $sorrowHigh; 193 | $images[$i]["gv_face_anger"] = $angerHigh; 194 | $images[$i]["gv_face_surprise"] = $surpriseHigh; 195 | break; 196 | } 197 | } 198 | } 199 | echo "\n"; 200 | fputcsv($fp,$images[$i],$csvDelimiter,"\"","\\"); 201 | $images[$i] = ""; 202 | } 203 | 204 | function processImage($imageUrl, $imageHash) { 205 | global $jsonDir,$jsoncopyDir; 206 | 207 | $jsonfn = $jsonDir . $imageHash . ".json"; 208 | $jsoncopy = $jsoncopyDir . $imageHash . ".json"; 209 | 210 | // Check if file has been processed and use cahed content if available 211 | if(file_exists($jsonfn)) { 212 | echo "\t**Using cached content (remove all files in the cache folder if you see this message and the tool is not working yet)**\n"; 213 | $jsonResponse = file_get_contents($jsonfn); 214 | file_put_contents($jsoncopy, $jsonResponse); 215 | } 216 | else { 217 | $jsonResponse = getAnnotation($imageUrl, $imageHash); 218 | file_put_contents($jsonfn, $jsonResponse); 219 | file_put_contents($jsoncopy, $jsonResponse); 220 | } 221 | return json_decode($jsonResponse); 222 | } 223 | 224 | function getAnnotation($imageUrl, $imageID) { 225 | global $apiKey, $imagesRemote, $forceBase64, $saveImageCopy, $imgDir; 226 | 227 | // Create json request according to settings 228 | if($imagesRemote && !$forceBase64) { 229 | $jsonRequest = jsonRequestRemote($imageUrl); 230 | } 231 | else { 232 | if(!$absolutePath && !$imagesRemote) { 233 | $imageUrl = $inputImgDir . $imageUrl; 234 | } 235 | echo "\tEncoding base64..."; 236 | $image_base64 = base64_encode(file_get_contents($imageUrl)); 237 | $jsonRequest = jsonRequestBase64($image_base64); 238 | echo "done.\n"; 239 | } 240 | 241 | // Submit request to API 242 | $cvurl = 'https://vision.googleapis.com/v1/images:annotate?key=' . $apiKey; 243 | $curl = curl_init(); 244 | curl_setopt($curl, CURLOPT_URL, $cvurl); 245 | curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); 246 | curl_setopt($curl, CURLOPT_HTTPHEADER, array('Content-type: application/json')); 247 | curl_setopt($curl, CURLOPT_POST, true); 248 | curl_setopt($curl, CURLOPT_POSTFIELDS, $jsonRequest); 249 | echo "\tMaking API request..."; 250 | $jsonResponse = curl_exec($curl); 251 | $status = curl_getinfo($curl, CURLINFO_HTTP_CODE); 252 | curl_close($curl); 253 | echo "done. \n"; 254 | return $jsonResponse; 255 | } 256 | 257 | function jsonRequestRemote($imageUrl) { 258 | $jsonRequest = '{ 259 | "requests": [ 260 | { 261 | "image": { 262 | "source": { 263 | "imageUri": "' . $imageUrl . '" 264 | } 265 | }, 266 | "features": [' . jsonRequestFeatures() . '] 267 | } 268 | ] 269 | }'; 270 | return $jsonRequest; 271 | } 272 | 273 | function jsonRequestBase64($base64) { 274 | $jsonRequest = '{ 275 | "requests": [ 276 | { 277 | "image": { 278 | "content": "'.$base64.'" 279 | }, 280 | "features": [' . jsonRequestFeatures() . '] 281 | } 282 | ] 283 | }'; 284 | return $jsonRequest; 285 | } 286 | 287 | function jsonRequestFeatures() { 288 | global $moduleActivation, $maxResults; 289 | 290 | $jsonRequestFeatures = ''; 291 | end($moduleActivation); 292 | $lastModuleKey = key($moduleActivation); 293 | 294 | foreach ($moduleActivation as $module => $status) { 295 | if(!$status) { continue; } 296 | $jsonRequestFeatures .= ' 297 | { 298 | "type": "' . $module . '", 299 | "maxResults": ' . $maxResults . ' 300 | }'; 301 | if($module != $lastModuleKey) { 302 | $jsonRequestFeatures .= ', 303 | '; 304 | } 305 | } 306 | return $jsonRequestFeatures; 307 | } 308 | 309 | function getCSV($filename,$delimiter = ",") { 310 | 311 | if(!file_exists($filename) || !is_readable($filename)) { return false; } 312 | 313 | $header = null; 314 | $data = array(); 315 | if(($handle = fopen($filename,"r")) !== false) { 316 | while (($row = fgetcsv($handle, 0, $delimiter)) !== false) { 317 | if(!$header) { 318 | $row[0] = preg_replace("/\xEF\xBB\xBF/", "", $row[0]); // delete UTF-8 BOM (it's put into the file again at write) 319 | $header = $row; 320 | } else { 321 | $data[] = array_combine($header, $row); 322 | } 323 | } 324 | fclose($handle); 325 | } 326 | return $data; 327 | } 328 | 329 | 330 | function clean($text) { 331 | 332 | $text = preg_replace("/[\n\t\r]/"," ", $text); 333 | 334 | return $text; 335 | } 336 | 337 | function likelihoodCompare($one, $two) { 338 | if($one=="UNDETECTED" && ($two=="UNKOWN" || $two=="VERY_UNLIKELY" || $two=="UNLIKELY" || $two=="POSSIBLE" || $two=="LIKELY"|| $two=="VERY_LIKELY")) { 339 | return $two; 340 | } 341 | else if($one=="UNKWOWN" && ($two=="VERY_UNLIKELY" || $two=="UNLIKELY" || $two=="POSSIBLE" || $two=="LIKELY"|| $two=="VERY_LIKELY")) { 342 | return $two; 343 | } 344 | else if ($one=="VERY_UNLIKELY" && ($two=="UNLIKELY" || $two=="POSSIBLE" || $two=="LIKELY"|| $two=="VERY_LIKELY")) { 345 | return $two; 346 | } 347 | else if ($one =="UNLIKELY" && ($two=="POSSIBLE" || $two=="LIKELY"|| $two=="VERY_LIKELY")) { 348 | return $two; 349 | } 350 | else if ($one =="POSSIBLE" && ($two=="LIKELY"|| $two=="VERY_LIKELY")) { 351 | return $two; 352 | } 353 | else if ($one =="LIKELY" && ($two=="VERY_LIKELY")) { 354 | return $two; 355 | } 356 | else { 357 | return $one; 358 | } 359 | } 360 | 361 | function catchError($jsonResponse) { 362 | foreach($jsonResponse->responses[0] as $error) { 363 | switch ($error->code) { 364 | case 7: 365 | echo "\n **PROCESSING ERROR** \nGoogle Vision API is unable to access the remote image. Try setting 'forceBase64' in configuration file to 'TRUE'. Script will be interrupted. \n\n"; 366 | exit(); 367 | break; 368 | default: 369 | break; 370 | } 371 | } 372 | } 373 | 374 | ?> 375 | --------------------------------------------------------------------------------