├── Checker.php ├── LICENSE ├── NudeDetector.php ├── README.md ├── bounding_polygon.php ├── composer.json ├── fake_nude.jpg ├── fake_nude.php ├── region_colors.php └── skin_maps.php /Checker.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | reasons = TRUE; 10 | $ycbcr = new NudeDetector(null, 'YCbCr'); 11 | $ycbcr->reasons = TRUE; 12 | foreach ($argv as $idx => $arg) { 13 | 14 | if ($idx == 0) continue; 15 | 16 | $hsv->set_file_name($arg); 17 | $y = $hsv->is_nude(); 18 | echo $arg . "\tHSV\t"; 19 | if ($y) echo "NUDITY!\n"; 20 | else echo "Not nude.\n"; 21 | 22 | $ycbcr->set_file_name($arg); 23 | $y = $ycbcr->is_nude(); 24 | echo $arg . "\tYCbCr\t"; 25 | if ($y) echo "NUDITY!\n"; 26 | else echo "Not nude.\n"; 27 | } 28 | 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Bruce Ediger 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NudeDetector.php: -------------------------------------------------------------------------------- 1 | detection_function = $skin_detection . '_skin_detector'; 32 | if ($file_name) 33 | $this->set_file_name($file_name); 34 | } 35 | 36 | public function __destruct() { 37 | if ($this->image) imagedestroy($this->image); 38 | if ($this->skin_map) imagedestroy($this->skin_map); 39 | if ($this->region_numbers) $this->region_numbers = NULL; 40 | } 41 | 42 | public function set_file_name($file_name) { 43 | $this->file_name = $file_name; 44 | if ($this->file_name) { 45 | if ($this->image) { 46 | imagedestroy($this->image); 47 | $this->image = NULL; 48 | } 49 | if ($this->skin_map) { 50 | imagedestroy($this->skin_map); 51 | $this->skin_map = NULL; 52 | } 53 | $this->create_image(); 54 | } else 55 | $this->image = NULL; 56 | 57 | if ($this->image) { 58 | $this->width = imagesx($this->image); 59 | $this->height = imagesy($this->image); 60 | } else { 61 | $this->height = -1; 62 | $this->width = -1; 63 | } 64 | } 65 | 66 | function create_image() { 67 | 68 | $info = getimagesize($this->file_name); 69 | 70 | switch ($info[2]) { 71 | case IMAGETYPE_GIF: 72 | if (($this->image = imagecreatefromgif($this->file_name)) !== false) { 73 | imagepalettetotruecolor($this->image); 74 | } 75 | break; 76 | case IMAGETYPE_JPEG: 77 | if (function_exists('imagecreatefromjpeg')) { 78 | $this->image = imagecreatefromjpeg($this->file_name); 79 | return; 80 | } 81 | break; 82 | case IMAGETYPE_PNG: 83 | $this->image = imagecreatefrompng($this->file_name); 84 | break; 85 | } 86 | return; 87 | } 88 | 89 | function map_skin_pixels() { 90 | if ($this->image == NULL || $this->height < 0 || $this->width < 0) 91 | return; 92 | 93 | if ($this->skin_map) { 94 | imagedestroy($this->skin_map); 95 | $this->region_numbers = NULL; 96 | } 97 | 98 | $this->skin_map = imagecreate($this->width, $this->height); 99 | $black = imagecolorallocate($this->skin_map, 0,0,0); 100 | $white = imagecolorallocate($this->skin_map, 255,255,255); 101 | $this->background_pixel_count = 0; 102 | $this->skin_pixel_count = 0; 103 | 104 | 105 | foreach (range(0, $this->width -1) as $x) { 106 | foreach (range(0, $this->height - 1) as $y) { 107 | $rgb = imagecolorat($this->image, $x, $y); 108 | $r = ($rgb >> 16) & 0xFF; 109 | $g = ($rgb >> 8) & 0xFF; 110 | $b = $rgb & 0xFF; 111 | 112 | # XXX - Need to have ability to use YCbCr, too. 113 | 114 | $color = $white; 115 | if ($this->{$this->detection_function}($r, $g, $b)) { 116 | ++$this->skin_pixel_count; 117 | $color = $black; 118 | } else 119 | ++$this->background_pixel_count; 120 | 121 | imagesetpixel($this->skin_map, $x, $y, $color); 122 | } 123 | } 124 | } 125 | 126 | # RGB based instead of HSV or YCbCr. Popular (?) amongst 127 | # PHP people, I think. 128 | function alsharif_skin_detector($R, $G, $B) { 129 | # skin detection criteria from "Image Filter v 1.0". 130 | # http://www.phpclasses.org/package/3269-PHP-Determine-whether-an-image-may-contain-nudity.html 131 | $alsharif_skin_colored = FALSE; 132 | if ($R >= 0x79 && $R <= 0xFE 133 | && $G >= 0x3B && $G <= 0xC5 134 | && $B >= 0x24 && $G <= 0xBF) 135 | $alsharif_skin_colored = TRUE; 136 | return $alsharif_skin_colored; 137 | } 138 | 139 | function YCbCr_skin_detector($r, $g, $b) { 140 | 141 | list($Y, $Cb, $Cr) = $this->calculate_YCbCr($r, $g, $b); 142 | 143 | $r = FALSE; 144 | # "Explicit Image Dectition using YCbCr Space Color Model 145 | # as Skin Detection", Basilio, Torees, Perez, Medina and Meana 146 | if (/* $YCbCr[0] > 80.0 && */ $Cb >= 80. && $Cb <= 120. 147 | && $Cr >= 133. && $Cr <= 173.) 148 | $r = TRUE; 149 | 150 | return $r; 151 | } 152 | 153 | function HSV_skin_detector($r, $g, $b) { 154 | 155 | list($H, $S, $V) = $this->calculate_HSV($r, $g, $b); 156 | 157 | $r = FALSE; 158 | 159 | if ( $H > 0. && $H < 0.25 160 | && $S > 0.15 && $S < 0.9 161 | && $V > 0.20 && $V < 0.95) 162 | $r = TRUE; 163 | 164 | return $r; 165 | } 166 | 167 | function calculate_everything() { 168 | if ($this->skin_map == NULL) 169 | $this->map_skin_pixels(); 170 | 171 | if ($this->region_numbers == NULL) 172 | $this->determine_regions(); 173 | 174 | if ($this->region_population == NULL) 175 | $this->count_region_population(); 176 | 177 | if ($this->hull == NULL) 178 | $this->find_bounding_polygon(); 179 | 180 | if ($this->hull_area == 0) 181 | $this->calculate_hull_area(); 182 | 183 | if ($this->skin_pixels_hull_count == 0) 184 | $this->count_bounding_polygon(); 185 | } 186 | 187 | function decision() { 188 | $total_pixel_count 189 | = (float)($this->skin_pixel_count 190 | + $this->background_pixel_count); 191 | 192 | if ($total_pixel_count == 0) 193 | return FALSE; 194 | 195 | $total_skin_portion = (float)$this->skin_pixel_count/$total_pixel_count; 196 | 197 | # Criteria (a) 198 | if ($total_skin_portion < 0.15) { 199 | if ($this->reasons) 200 | printf("Total skin pixes are %.2f%% of total pixel count, < 15%%\n", $total_skin_portion * 100.); 201 | return FALSE; 202 | } 203 | 204 | # Criteria (b) 205 | $largest_region_portion = (float)$this->sorted_region_populations[0][1]/(float)$this->skin_pixel_count; 206 | $next_region_portion = (float)$this->sorted_region_populations[1][1]/(float)$this->skin_pixel_count; 207 | $third_region_portion = (float)$this->sorted_region_populations[2][1]/(float)$this->skin_pixel_count; 208 | if ($largest_region_portion < 0.35 && $next_region_portion < 0.30 && $third_region_portion < 0.30) { 209 | if ($this->reasons) 210 | printf("3 largest skin regions are %.0f%%, %.0f%% and %.0f%%, less than 35%%, 30%% and 30%%, respectively.\n", 211 | $largest_region_portion*100., 212 | $next_region_portion*100., 213 | $third_region_portion*100. 214 | ); 215 | return FALSE; 216 | } 217 | 218 | # Criteria (c) 219 | if ($largest_region_portion < 0.45) { 220 | if ($this->reasons) 221 | printf("Largest skin-colored region is %.2f%% of total image pixels, < 45%%\n", $largest_region_portion*100); 222 | return FALSE; 223 | } 224 | 225 | # Criteria (d) 226 | if ($total_skin_portion < 0.30) { 227 | $in_polygon_portion = (float)$this->skin_pixels_hull_count/(float)$this->hull_area; 228 | if ($in_polygon_portion < 0.55) { 229 | if ($this->reasons) 230 | printf("Skin pixels %.0f%% of total, < 30%%, in-bounding polygon skin pixels %.0f%%, < 55%% of bounding polygon area\n", 231 | $total_skin_portion*100, $in_polygon_portion*100); 232 | return FALSE; 233 | } 234 | } 235 | 236 | # Criteria (e) 237 | # "If the number of skin regions is more than 60 and the average 238 | # intensity within the polygon is less than 0.25, the image is not nude." 239 | # WTF does "intensity" mean? 240 | 241 | # Criteria (f) 242 | # "Otherwise, the image is nude." 243 | return TRUE; 244 | } 245 | 246 | function is_nude() { 247 | 248 | if ($this->skin_map == NULL) 249 | $this->map_skin_pixels(); 250 | 251 | $total_pixel_count = (float)($this->skin_pixel_count +$this->background_pixel_count); 252 | 253 | if ($total_pixel_count == 0) 254 | return FALSE; 255 | 256 | $total_skin_portion = (float)$this->skin_pixel_count/$total_pixel_count; 257 | 258 | # Criteria (a) 259 | if ($total_skin_portion < 0.15) { 260 | if ($this->reasons) 261 | printf("Total skin pixes are %.2f%% of total pixel count, < 15%%\n", $total_skin_portion * 100.); 262 | return FALSE; 263 | } 264 | 265 | if ($this->region_numbers == NULL) 266 | $this->determine_regions(); 267 | 268 | if ($this->region_population == NULL) 269 | $this->count_region_population(); 270 | 271 | if ($this->sorted_region_populations == NULL) 272 | $this->sort_regions_by_population(); 273 | 274 | # Criteria (b) 275 | $largest_region_portion = (float)$this->sorted_region_populations[0][1]/(float)$this->skin_pixel_count; 276 | $next_region_portion = (float)$this->sorted_region_populations[1][1]/(float)$this->skin_pixel_count; 277 | $third_region_portion = (float)$this->sorted_region_populations[2][1]/(float)$this->skin_pixel_count; 278 | if ($largest_region_portion < 0.35 && $next_region_portion < 0.30 && $third_region_portion < 0.30) { 279 | if ($this->reasons) 280 | printf("3 largest skin regions are %.0f%%, %.0f%% and %.0f%%, less than 35%%, 30%% and 30%%, respectively.\n", 281 | $largest_region_portion*100., 282 | $next_region_portion*100., 283 | $third_region_portion*100. 284 | ); 285 | return FALSE; 286 | } 287 | 288 | # Criteria (c) 289 | if ($largest_region_portion < 0.45) { 290 | if ($this->reasons) 291 | printf("Largest skin-colored region is %.2f%% of total image pixels, < 45%%\n", $largest_region_portion*100); 292 | return FALSE; 293 | } 294 | 295 | # Criteria (d) 296 | if ($total_skin_portion < 0.30) { 297 | if ($this->hull == NULL) 298 | $this->find_bounding_polygon(); 299 | 300 | if ($this->hull_area == 0) 301 | $this->calculate_hull_area(); 302 | 303 | if ($this->skin_pixels_hull_count == 0) 304 | $this->count_bounding_polygon(); 305 | 306 | $in_polygon_portion = (float)$this->skin_pixels_hull_count/(float)$this->hull_area; 307 | if ($in_polygon_portion < 0.55) { 308 | if ($this->reasons) 309 | printf("Skin pixels %.0f%% of total, < 30%%, in-bounding polygon skin pixels %.0f%%, < 55%% of bounding polygon area\n", 310 | $total_skin_portion*100, $in_polygon_portion*100); 311 | return FALSE; 312 | } 313 | } 314 | 315 | # Criteria (e) 316 | # "If the number of skin regions is more than 60 and the average 317 | # intensity within the polygon is less than 0.25, the image is not nude." 318 | # WTF does "intensity" mean? 319 | 320 | # Criteria (f) 321 | # "Otherwise, the image is nude." 322 | return TRUE; 323 | } 324 | 325 | # Count all skin pixels that lie within the 326 | # "bounding polygon". Done by coloring pixels on $this->skin_map 327 | # (which is black & white) in grey, and then scanning across the image. 328 | # If you hit a grey pixel, and you're not "in" the polygon, you're "in" on 329 | # the next pixel. If you're "in" and hit a grey pixel, now you're "out". 330 | # Have to account for "jaggies" in pixelated lines that leave more than 331 | # a single pixel per horizontal scan colored grey. 332 | function count_bounding_polygon() { 333 | if ($this->skin_map == NULL) 334 | return; 335 | 336 | # Draw grey lines on $this->skin_map to represent 337 | # the bounding polygon. 338 | $grey = imagecolorallocate($this->skin_map, 255, 0, 0); 339 | $black = imagecolorclosest($this->skin_map, 0, 0, 0); 340 | $n = count($this->hull); 341 | foreach (range(0, count($this->hull) - 2) as $i) 342 | imageline($this->skin_map, $this->hull[$i][0], $this->hull[$i][1], $this->hull[$i+1][0], $this->hull[$i+1][1], $grey); 343 | 344 | $white = imagecolorclosest($this->skin_map, 255, 255, 255); 345 | for ($y = 0; $y < $this->height; ++$y) { 346 | $in_polygon = FALSE; 347 | $left_pixel_color = $white; 348 | for ($x = 0; $x < $this->width; ++$x) { 349 | $pixel_color = imagecolorat($this->skin_map, $x, $y); 350 | if ($pixel_color == $grey) { 351 | # Because we scan from left to right, the following accounts 352 | # for "lines" that leave multiple y-coord pixels colored. 353 | if ($left_pixel_color != $grey) 354 | $in_polygon = $in_polygon? FALSE: TRUE; 355 | # Else, leave $in_polygon flag alone. 356 | } 357 | $left_pixel_color = $pixel_color; 358 | 359 | $skin_pixel = ($pixel_color == $black); 360 | 361 | if ($in_polygon && $skin_pixel) 362 | ++$this->skin_pixels_hull_count; 363 | else if (!$in_polygon && $skin_pixel) 364 | imagesetpixel($this->skin_map, $x, $y, $white); 365 | } 366 | } 367 | } 368 | 369 | # $this->hull[] contains vertices of a convex hull. 370 | # Calculate hull's area. 371 | function calculate_hull_area() { 372 | 373 | if ($this->hull == NULL || count($this->hull) < 1) 374 | return; 375 | 376 | // first and last entries identical. 377 | $n = count($this->hull) - 1; 378 | 379 | $a = 0; 380 | 381 | for ($i = 0; $i < $n - 1; ++$i) 382 | $a += $this->hull[$i][0]*$this->hull[$i + 1][1]; 383 | $a += $this->hull[$n-1][0]*$this->hull[0][1]; 384 | for ($i = 0; $i < $n - 1; ++$i) 385 | $a -= $this->hull[$i+1][0]*$this->hull[$i][1]; 386 | $a -= $this->hull[0][0]*$this->hull[$n-1][1]; 387 | 388 | $this->hull_area = abs((float)$a/2); 389 | } 390 | 391 | # Create a list of regions/populations sorted by 392 | # population, largest to smallest. This is a little 393 | # weird, as you can easily end up with more than one 394 | # region with a given population. This could cause 395 | # problems with Ap-Apid's algorithm - you could interpret 396 | # "3 largest regions" many different ways. 397 | function sort_regions_by_population() { 398 | if ($this->region_population == NULL) 399 | return; 400 | 401 | $populations = array(); 402 | foreach ($this->region_population as $regno => $population) { 403 | if ($regno != 0) 404 | $populations[] = $population; 405 | } 406 | rsort($populations); 407 | $prev_sorted_regions = array(); 408 | 409 | # This is an instance var because some data in it gets 410 | # used here, and in deciding "nude/not nude" later. 411 | $this->sorted_region_populations = array(); 412 | 413 | $max = count($populations); 414 | 415 | # This is going to be an O(n^2) sort of $this->region_population, 416 | # but there's usually only a few regions. Hopefully, it's never 417 | # prohibitive. Another reason to make 1- or 2-pixel regions into 418 | # "background", I suppose. 419 | for ($i = 0; $i < $max; ++$i) { 420 | $pop = $populations[$i]; 421 | foreach ($this->region_population as $regno => $population) { 422 | if (!in_array($regno, $prev_sorted_regions)) { 423 | if ($pop == $population) { 424 | $this->sorted_region_populations[] = array($regno, $population); 425 | $prev_sorted_regions[] = $regno; 426 | } 427 | } 428 | } 429 | } 430 | 431 | # $this->sorted_region_population[] is a largest-to-smallest 432 | # list of [region number, population of that region] pairs. 433 | } 434 | 435 | # Ap-Apid doesn't define what a "bounding polygon" is, 436 | # so I'm assuming that a convex hull of the 4 points for 437 | # each of the 3 largest regions constitutes a "bounding polygon". 438 | # If Ap-Apid means some kind of concave hull around the 439 | # top/left/right/bottom points of the 3 largest regions, 440 | # the convex hull will over-count the number of skin pixels 441 | # in the "bounding polygon". 442 | function find_bounding_polygon() { 443 | 444 | if ($this->sorted_region_populations == NULL) 445 | $this->sort_regions_by_population(); 446 | 447 | if ($this->region_population == NULL) 448 | return; 449 | 450 | if ($this->sorted_region_populations == NULL) 451 | return; 452 | 453 | # Arrays to store coords of top, left, right and bottom 454 | # coords of pixels of each of 3 largest regions. 455 | # $top[0] will be the largest region's uppermost (closest 456 | # to X-axis (x,y) coords, $top[1] will be 2nd largest 457 | # region's coords, and so on. 12 coordinate pairs in all. 458 | $top = array_fill(0, 3, array(0, $this->height + 1)); 459 | $left = array_fill(0, 3, array($this->width + 1, 0)); 460 | $right = array_fill(0, 3, array(0, 0)); 461 | $bot = array_fill(0, 3, array(0, 0)); 462 | 463 | $this->find_extreme_coords($top, $left, $right, $bot); 464 | 465 | # Points that are either on or in a convex hull. 466 | $points = array(); 467 | 468 | for ($n = 0; $n < 3; ++$n) { 469 | $points[] = $top[$n]; 470 | $points[] = $left[$n]; 471 | $points[] = $right[$n]; 472 | $points[] = $bot[$n]; 473 | } 474 | 475 | 476 | # "Gift Wrapping" method - only 12 possible points so this 477 | # doesn't take much time. 478 | $lower_left_point = $this->find_lower_left($points); 479 | $this->hull = array($lower_left_point); 480 | 481 | $vector = array(0, 1); 482 | $last_magnitude = 1.0; 483 | $current_point = array($lower_left_point[0], $lower_left_point[1]); 484 | 485 | $point_count = 0; 486 | $number_of_points = count($points); 487 | 488 | do { 489 | $min_angle = 600.0; 490 | $next_point = FALSE; 491 | 492 | foreach ($points as $candidate_point) { 493 | 494 | // Artifact of foreach(): $current_point can turn up. 495 | if ($candidate_point[0] == $current_point[0] 496 | && $candidate_point[1] == $current_point[1]) continue; 497 | 498 | $delta_x = $candidate_point[0] - $current_point[0]; 499 | $delta_y = $candidate_point[1] - $current_point[1]; 500 | $magnitude = sqrt($delta_x*$delta_x + $delta_y*$delta_y); 501 | 502 | $algebraic_dot_product = $delta_x * $vector[0] 503 | + $delta_y * $vector[1]; 504 | 505 | $cosine_angle = $algebraic_dot_product/$magnitude; 506 | $angle = acos($cosine_angle); 507 | 508 | if ($angle < $min_angle) { 509 | $next_point = array($candidate_point[0], $candidate_point[1]); 510 | $min_angle = $angle; 511 | $last_magnitude = $magnitude; 512 | $last_delta_x = $delta_x; 513 | $last_delta_y = $delta_y; 514 | } 515 | } 516 | 517 | $vector = array($last_delta_x/$last_magnitude, $last_delta_y/$last_magnitude); 518 | $current_point = $next_point; 519 | 520 | $this->hull[] = $current_point; 521 | ++$point_count; 522 | 523 | } while ($point_count <= $number_of_points 524 | && !($current_point[0] == $lower_left_point[0] 525 | && $current_point[1] == $lower_left_point[1]) 526 | ); 527 | 528 | // $this->hull has convex hull vertices in order 529 | // from lower left (upper left as we view image) 530 | // around and back to lower left. First and last 531 | // entries identical. 532 | } 533 | 534 | function find_lower_left($points) { 535 | $min_x = $this->width + 2; 536 | $min_y = $this->height + 2; 537 | $lower_left_point = FALSE; 538 | 539 | foreach ($points as $point) { 540 | if ($point[0] < $min_x) { 541 | $lower_left_point = array($point[0],$point[1]); 542 | $min_x = $point[0]; 543 | $min_y = $point[1]; 544 | } else if ($point[0] == $min_x) { 545 | if ($point[1] < $min_y) { 546 | $lower_left_point = array($point[0],$point[1]); 547 | $min_x = $point[0]; 548 | $min_y = $point[1]; 549 | } 550 | } 551 | } 552 | return $lower_left_point; 553 | } 554 | 555 | # Another possible deviation from Ap-Apid's algorithm: 556 | # if 4 or more largest regions have the same population, 557 | # you could pick *different* "3 largest regions". 558 | function find_extreme_coords(&$top, &$left, &$right, &$bot) { 559 | 560 | $r1 = $this->sorted_region_populations[0][0]; 561 | $r2 = $this->sorted_region_populations[1][0]; 562 | $r3 = $this->sorted_region_populations[2][0]; 563 | 564 | for ($x = 0; $x < $this->width; ++$x) { 565 | for ($y = 0; $y < $this->height; ++$y) { 566 | $n = -1; 567 | $regno = $this->region_numbers[$x][$y]; 568 | switch ($regno) { 569 | case $r1: 570 | $n = 0; 571 | break; 572 | case $r2: 573 | $n = 1; 574 | break; 575 | case $r3: 576 | $n = 2; 577 | break; 578 | default: 579 | break; 580 | } 581 | 582 | # Ap-Apid isn't too careful about what "topmost" 583 | # or "leftmost" or *most means. I effectively choose the 584 | # leftmost element with the minimum Y-coord, you could 585 | # choose rightmost or the middle minimum Y-coord. Same 586 | # with top/bottom/right coords. The first pixel with a 587 | # *most coordinate wins. Changing ">" to ">=", "<" to "<=" 588 | # could choose different extreme points. 589 | if ($n >= 0) { 590 | # Topmost 591 | if ($y < $top[$n][1]) { 592 | $top[$n][0] = $x; 593 | $top[$n][1] = $y; 594 | } 595 | 596 | # Leftmost 597 | if ($x < $left[$n][0]) { 598 | $left[$n][0] = $x; 599 | $left[$n][1] = $y; 600 | } 601 | 602 | # Rightmost 603 | if ($x > $right[$n][0]) { 604 | $right[$n][0] = $x; 605 | $right[$n][1] = $y; 606 | } 607 | 608 | # Bottommost 609 | if ($y > $bot[$n][1]) { 610 | $bot[$n][0] = $x; 611 | $bot[$n][1] = $y; 612 | } 613 | } 614 | } 615 | } 616 | 617 | } 618 | 619 | // Determine regions of skin colored pixels, where "region" is 620 | // based on whether a pixel touches another pixle left/right/above/below 621 | // it. See http://en.wikipedia.org/wiki/Connected-component_labeling 622 | // This finds 4-connected (top, left, bottom, right) regions. 4-connected 623 | // and 8-connected regions aren't different in real images very often. 624 | // 625 | // This is unfortunately large, as the algorithm is unfortunately 626 | // complicated. 627 | // 628 | // Also, Ap-Apid doesn't give details of what algorithm he used 629 | // to choose "regions". This one just considers a 4-pixel surrounding 630 | // region, rather than an 8-pixel region. No corner-to-corner touching 631 | // considered. 632 | // 633 | // Computationally expensive, too. 634 | function determine_regions() { 635 | if ($this->skin_map == NULL) 636 | return; 637 | 638 | $this->region_population = NULL; 639 | 640 | $equiv_regions = array(0); // Index of 0 is invalid 641 | $this->region_numbers = array_fill(0, $this->width+1, array_fill(0, $this->height+1, 0)); 642 | 643 | // For now, $this->region_numbers[$x][$y], if nonzero, is an index into 644 | // $equiv_regions[], and that value is the region number. That changes 645 | // later in this function. 646 | 647 | $region_sequence = 0; // Used to number regions as they turn up. 648 | 649 | $black = imagecolorclosest($this->skin_map, 0, 0, 0); 650 | $black_pixel_count = 0; 651 | 652 | # Outer loop over Y-values, inner loop over X-values, 653 | # so that North and West pixels (relative to pixel at [$x,$y]) have 654 | # a region number assigned to them already. 655 | for ($y = 0; $y < $this->height; ++$y) { 656 | for ($x = 0; $x < $this->width; ++$x) { 657 | if (imagecolorat($this->skin_map, $x, $y) == $black) { 658 | ++$black_pixel_count; 659 | 660 | $west_region_no = 0; // A region number, not index into $equiv_regions[] 661 | $north_region_no = 0; // Also a region number. 662 | 663 | # West pixel 664 | if ($x - 1 >= 0 && imagecolorat($this->skin_map, $x - 1, $y) == $black) { 665 | // Pixel at ($x,$y) is in same region as pixel at ($x-1,$y) 666 | if (($west_region_idx = $this->region_numbers[$x-1][$y]) > 0) { 667 | $this->region_numbers[$x][$y] = $west_region_idx; 668 | $west_region_no = $equiv_regions[$west_region_idx]; 669 | } 670 | } 671 | 672 | # North pixel 673 | if ($y - 1 >= 0 && imagecolorat($this->skin_map, $x, $y-1) == $black) { 674 | // Pixel at ($x,$y) is in same region as pixel at ($x,$y-1) 675 | if (($north_region_idx = $this->region_numbers[$x][$y-1]) > 0) { 676 | $this->region_numbers[$x][$y] = $north_region_idx; 677 | $north_region_no = $equiv_regions[$north_region_idx]; 678 | } 679 | } 680 | 681 | # Both pixels - current pixel connects previonsly 682 | # unconnected regions. 683 | if ($west_region_no != 0 && $north_region_no != 0 && $west_region_no != $north_region_no) { 684 | $connected_region_no = $north_region_no; 685 | $other_region_no = $west_region_no; 686 | if ($north_region_no > $west_region_no) { 687 | $connected_region_no = $west_region_no; 688 | $other_region_no = $north_region_no; 689 | } 690 | $equiv_regions[$west_region_idx] = $connected_region_no; 691 | $equiv_regions[$north_region_idx] = $connected_region_no; 692 | 693 | // The apparent useless indirection of $equiv_regions comes 694 | // into play here: we only need iterate over $equiv_regions 695 | // to connect regions, not over the entire contents of $this->reqion_indexes 696 | // each time we find a connection between previously separate regions. 697 | for ($i = 1; $i < count($equiv_regions); ++$i) 698 | if ($equiv_regions[$i] == $other_region_no) 699 | $equiv_regions[$i] = $connected_region_no; 700 | 701 | } else if (($west_region_no == 0) && ($north_region_no == 0)) { 702 | # New region 703 | $this->region_numbers[$x][$y] = $region_sequence; 704 | $equiv_regions[] = $region_sequence++; 705 | } // else - Nothing more to do. 706 | } 707 | } 708 | } 709 | 710 | # XXX - should check if $black_pixel_count and $this->skin_pixel_count equate. 711 | 712 | # Make a map, called $renumber, so as to get consecutive 713 | # numbers, starting at 1, for the skin-colored regions. 714 | $renumber = array(0 => 0); 715 | $idx = 1; 716 | $max = count($equiv_regions); 717 | for ($i = 0; $i < $max; ++$i) { 718 | $regno = $equiv_regions[$i]; 719 | if (!array_key_exists($regno, $renumber)) 720 | $renumber[$regno] = $idx++; 721 | } 722 | 723 | # Fix up $this->region_numbers to hold actual region numbers, not 724 | # indexes into $equiv_regions[]. Note the use of $renumber. 725 | for ($x = 0; $x < $this->width; ++$x) { 726 | $a = $this->region_numbers[$x]; 727 | for ($y = 0; $y < $this->height; ++$y) { 728 | $this->region_numbers[$x][$y] 729 | = $renumber[$equiv_regions[$a[$y]]]; 730 | } 731 | } 732 | 733 | } 734 | 735 | // Fill in $this->region_population, a 1-D associative array, 736 | // keys are region numbers, values are pixel populations. 737 | function count_region_population() { 738 | if ($this->region_numbers == NULL) 739 | return; 740 | $this->hull = NULL; 741 | $this->hull_area = 0; 742 | $this->skin_pixels_hull_count = 0; 743 | 744 | $this->region_population = array(); 745 | for ($x = 0; $x < $this->width; ++$x) { 746 | $a = $this->region_numbers[$x]; 747 | for ($y = 0; $y < $this->height; ++$y) { 748 | $regno = $a[$y]; 749 | if (isset($this->region_population[$regno])) 750 | ++$this->region_population[$regno]; 751 | else 752 | $this->region_population[$regno] = 1; 753 | } 754 | } 755 | 756 | # Clean up very tiny regions. 757 | $tiny_region_numbers = array(); 758 | foreach ($this->region_population as $regno => $pop) { 759 | if ($pop < 10) { 760 | unset($this->region_population[$regno]); 761 | $tiny_region_numbers[] = $regno; 762 | } 763 | } 764 | 765 | for ($x = 0; $x < $this->width; ++$x) { 766 | $a = $this->region_numbers[$x]; 767 | for ($y = 0; $y < $this->height; ++$y) { 768 | $regno = $a[$y]; 769 | if (in_array($regno, $tiny_region_numbers)) { 770 | $this->region_numbers[$x][$y] = 0; 771 | } 772 | } 773 | } 774 | 775 | $this->region_count = count($this->region_population); 776 | } 777 | 778 | // 0 <= $r <= 255, and for $g and $b, all integers. 779 | function calculate_YCbCr($r, $g, $b) 780 | { 781 | return array( 782 | (int)( 16.0 + 0.256788*$r + 0.504129*$g + 0.097905*$b), # Y 783 | (int)(128.0 - 0.148223*$r - 0.290992*$g + 0.439215*$b), # Cb 784 | (int)(128.0 + 0.439215*$r - 0.367788*$g - 0.071427*$b), # Cr 785 | ); 786 | } 787 | 788 | 789 | # $r, $g, $b between 0 and 255. 790 | # Return HSV, all between 0.0 and 1.0 791 | # XXX - Room here to remove "/255." from the code. 792 | function calculate_HSV($r, $g, $b) 793 | { 794 | $r = (float)$r/255.; 795 | $g = (float)$g/255.; 796 | $b = (float)$b/255.; 797 | 798 | # XXX - not the RGB -> HSV algorithm appearing in 799 | # Ap-Apid's paper. That one just doesn't work. 800 | $alpha = ($r + $r - $g - $b)/2.; 801 | $beta = 0.86602540378 * ($g - $b); 802 | $H = atan2($beta, $alpha); # $H in radians, -Pi to Pi 803 | if ($H < 0.0) $H += 2*M_PI; 804 | $H /= (2*M_PI); // $H in degrees 805 | $C = sqrt($alpha*$alpha + $beta*$beta); 806 | $V = max($r, $g, $b); 807 | if ($V == 0.0) 808 | $S = 0; 809 | else 810 | $S = $C/$V; 811 | 812 | return array($H, $S, $V); 813 | } 814 | 815 | function create_colored_regions() { 816 | if ($this->skin_map == NULL) return NULL; 817 | if ($this->region_numbers == NULL) return NULL; 818 | if ($this->region_count == 0) return NULL; 819 | 820 | $region_map = imagecreate($this->width, $this->height); 821 | 822 | $colrat = array(); 823 | 824 | # Non-skin pixels in region 0, color them white. 825 | $colorat[0] = imagecolorallocate($region_map, 255,255,255); 826 | 827 | $colorat[$this->sorted_region_populations[0][0]] = imagecolorallocate($region_map, 255,0,0); 828 | $colorat[$this->sorted_region_populations[1][0]] = imagecolorallocate($region_map, 0,255,0); 829 | $colorat[$this->sorted_region_populations[2][0]] = imagecolorallocate($region_map, 0,0,255); 830 | 831 | foreach (range(3, min(255, $this->region_count - 2)) as $i) { 832 | $colorat[$this->sorted_region_populations[$i][0]] 833 | = imagecolorallocate( 834 | $region_map, 835 | rand(0,250), 836 | rand(0,250), 837 | rand(0,250) 838 | ); 839 | } 840 | 841 | foreach (range(0, $this->height - 1) as $y) { 842 | foreach (range(0, $this->width - 1) as $x) { 843 | $region_no = $this->region_numbers[$x][$y]; 844 | imagesetpixel($region_map, $x, $y, isset($colorat[$region_no])? $colorat[$region_no]: $colorat[0]); 845 | } 846 | } 847 | 848 | return $region_map; 849 | } 850 | } 851 | ?> 852 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NudeDetectorPHP 2 | ## PHP implementation of Rigan Ap-Apid's "An Algorithm for Nudity Detection" 3 | 4 | [An Algorithm for Nudity 5 | Detection](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.96.9872&rep=rep1&type=pdf) 6 | promises an algorithm to determine whether images contain nudity or not. This 7 | is a PHP implementation of it. 8 | 9 | ### Choices 10 | 11 | Ap-Apid's paper doesn't specify the algorithm as tightly as an implementor might wish. 12 | 13 | An implementor could decide to use 4-connectivity or 8-connectivity to 14 | determine the "connected regions" of skin-colored pixels. I used the [Two 15 | Pass](https://en.wikipedia.org/wiki/Connected-component_labeling#Two-pass) 16 | algorithm from Wikipedia, and 4-connectivity.. 17 | 18 | Ap-Apid's algorithm depends on the "three largest regions" of connected 19 | skin-colored pixels. Does "largest" just mean greatest pixel count, or do you 20 | account for blocks of non-skin-colored pixels inside a region? How do you 21 | decide on 3 largest if you have equal sizes (by whatever measure) of more than 22 | one region? 23 | 24 | The algorithm has you finding "the leftmost, the uppermost, the rightmost, and 25 | the lowermost skin pixels of the three largest skin regions. Use these 26 | points as the corner points of a bounding polygon." 27 | 28 | Which "leftmost"? If the leftmost pixels of a skin-colored region are along the 29 | left hand side of an image, many pixels are "leftmost". Do you pick top, bottom 30 | or middle leftmost? The same question applies to the other 3 "x most" pixels 31 | of each region. 32 | 33 | The paper doesn't define "bounding polygon", I took it to be the convex hull 34 | that surrounds the twelve points (topmost, leftmost, lowermost and rightmost) 35 | of the 3 biggest skin-colored regions. But maybe Ap-Apid means something else, 36 | an irregular, possibly concave polygon perhaps. 37 | 38 | ### Example Use 39 | 40 | #!/usr/bin/env php 41 | $filename) { 46 | if ($idx == 0) continue; 47 | $detector->set_file_name($filename); 48 | if ($detector->is_nude()) { 49 | // Deal with $filename as ritually unclean. 50 | } else { 51 | // $filename probably doesn't contain nudity 52 | } 53 | } 54 | 55 | Notice that you can use the methods `set_file_name()` followed by `is_nude()` 56 | without creating a new instance of `NudeDetector`. 57 | 58 | ### Example Programs 59 | 60 | The example program illustrate features of `Class NudeDetectorPhp` 61 | that are beyond a simple example. They also allow you to check on 62 | intermediate steps in the nudity determination. 63 | 64 | > `Checker.php imagefile [imagefile ...]` 65 | 66 | Check each image file for nudity, using both HSV and YCbCr skin color models. 67 | Print out reasons for why an image doesn't qualify as "containing nudity". 68 | 69 | > `skin_map.php imagefile prefix` 70 | 71 | Creates two GIF files, one each for HSV and YCbCr skin color models. The GIF 72 | files are strictly black-and-white, every skin-colored pixel in the original 73 | imae file colored black in the GIF files, all other pixels colored white in 74 | the GIF files. The GIF files have the names `prefixHSV.gif` and `prefixYCbCr.gif`. 75 | 76 | Flicking between the two images with, for example, `feh` image viewer, gives 77 | you some idea of differences in how the two skin-color models in 78 | NudeDetectorPHP decide on skin colored pixels. 79 | 80 | > `region_colors.php imagefile outputfile` 81 | 82 | Creates a GIF-format output file with each connected region of skin-colored-pixels 83 | in the original image as a different color in the output image. 84 | 85 | > `bounding_polygon.php imagefile outputfile` 86 | 87 | Creates a GIF-format output file that has the "bounding polygon" of Ap-Apid's 88 | algorithm clipping the skin-colored pixels. 89 | 90 | ### Generate fake nudity 91 | > `fake_nude.php x y imagefile` 92 | 93 | Generates a JPEG-format imagefile `x` pixels wide and `y` pixels tall. This image 94 | does not contain any nudity. In fact the "skin colors" it generates are usually not 95 | even in the realm of biological, but `Checker.php` will consistently label the 96 | image as containing nudity. Ha ha! 97 | 98 | ![Fake nude generated by fake_nude.php](https://raw.githubusercontent.com/bediger4000/NudeDetectorPHP/master/fake_nude.jpg) 99 | 100 | _Example Fake Nude_ 101 | 102 | ### How Does It Perform? 103 | 104 | In my personal evaluation, not very well. It almost always identifies portraits 105 | and head shots as "nudity". It's confused by natural colors, sand, wood, rock 106 | or soil or even leaves. This is a flaw of skin color detection, but Ap-Apid 107 | does specify an HSV skin-color test. 108 | 109 | Overall, I can't even characterize it as "too prudish" (lots of false positives), 110 | or "too lecherous" (lots of false negatives). 111 | 112 | ### Other implementation 113 | 114 | The most widely known implementation of Rigan Ap-Apid's algorithm is 115 | [nude.js](https://github.com/pa7/nude.js). I didn't transliterate `nude.js` to 116 | PHP, I implemented the algorithm in Ap-Apid's paper from scratch. 117 | -------------------------------------------------------------------------------- /bounding_polygon.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | calculate_everything(); 14 | 15 | $img = $detector->skin_map; 16 | 17 | imagegif($img, $argv[2]); 18 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bediger4000/nudedetectorphp", 3 | "description": "Dectect images containing nudity some of the time", 4 | "require": { 5 | "php": "^5.3.3 || ^7.0" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /fake_nude.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bediger4000/NudeDetectorPHP/3260432e594f331f246eb470d91f204e65f038c5/fake_nude.jpg -------------------------------------------------------------------------------- /fake_nude.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | 95) && ($G>40 && $G <100) && ($B>20) && ((max($R,$G,$B) - min($R,$G,$B)) > 15) && (abs($R-$G)>15) && ($R > $G) && ($R > $B)); 60 | 61 | 62 | # Is it "skin colored according to 63 | # http://www.phpclasses.org/package/3269-PHP-Determine-whether-an-image-may-contain-nudity.html 64 | # Which seems to be fairly widespread in PHP circles? 65 | $alsharif_skin_colored = FALSE; 66 | if ($R >= 0x79 && $R <= 0xFE 67 | && $G >= 0x3B && $G <= 0xC5 68 | && $B >= 0x24 && $G <= 0xBF) 69 | $alsharif_skin_colored = TRUE; 70 | 71 | $is_skin_colored = $hsv_skin_colored && $alsharif_skin_colored; 72 | 73 | } while (!$is_skin_colored); 74 | 75 | return array($R, $G, $B); 76 | } 77 | 78 | function RGBFromYCbCr($y, $cb, $cr) 79 | { 80 | $Y = (double) $y; 81 | $Cb = (double) $cb; 82 | $Cr = (double) $cr; 83 | 84 | $r = (int) ($Y + 1.40200 * ($Cr - 0x80)); 85 | $g = (int) ($Y - 0.34414 * ($Cb - 0x80) - 0.71414 * ($Cr - 0x80)); 86 | $b = (int) ($Y + 1.77200 * ($Cb - 0x80)); 87 | 88 | $r = max(0, min(255, $r)); 89 | $g = max(0, min(255, $g)); 90 | $b = max(0, min(255, $b)); 91 | 92 | return array($r, $g, $b); 93 | } 94 | 95 | function HSV_skin_detector($r, $g, $b) 96 | { 97 | list($H, $S, $V) = calculate_HSV($r, $g, $b); 98 | 99 | $r = FALSE; 100 | 101 | if ( $H > 0. && $H < 0.25 102 | && $S > 0.15 && $S < 0.9 103 | && $V > 0.20 && $V < 0.95) 104 | $r = TRUE; 105 | 106 | return $r; 107 | } 108 | 109 | # $r, $g, $b between 0 and 255. 110 | # Return HSV, all between 0.0 and 1.0 111 | # XXX - Room here to remove "/255." from the code. 112 | function calculate_HSV($r, $g, $b) 113 | { 114 | $r = (float)$r/255.; 115 | $g = (float)$g/255.; 116 | $b = (float)$b/255.; 117 | 118 | # XXX - not the RGB -> HSV algorithm appearing in 119 | # Ap-Apid's paper. That one just doesn't work. 120 | $alpha = ($r + $r - $g - $b)/2.; 121 | $beta = 0.86602540378 * ($g - $b); 122 | $H = atan2($beta, $alpha); # $H in radians, -Pi to Pi 123 | if ($H < 0.0) $H += 2*M_PI; 124 | $H /= (2*M_PI); // $H in degrees 125 | $C = sqrt($alpha*$alpha + $beta*$beta); 126 | $V = max($r, $g, $b); 127 | if ($V == 0.0) 128 | $S = 0; 129 | else 130 | $S = $C/$V; 131 | 132 | return array($H, $S, $V); 133 | } 134 | -------------------------------------------------------------------------------- /region_colors.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | map_skin_pixels(); 13 | echo "determine regions\n"; 14 | $detector->determine_regions(); 15 | echo "count region populations\n"; 16 | $detector->count_region_population(); 17 | $detector->sort_regions_by_population(); 18 | 19 | echo "create colored regions\n"; 20 | $img = $detector->create_colored_regions(); 21 | 22 | imagegif($img, $argv[2]); 23 | imagedestroy($img); 24 | 25 | -------------------------------------------------------------------------------- /skin_maps.php: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | map_skin_pixels(); 15 | $ycbcr->map_skin_pixels(); 16 | 17 | $img = $hsv->skin_map; 18 | imagegif($img, $prefix .'HSV.gif'); 19 | $img = $ycbcr->skin_map; 20 | imagegif($img, $prefix .'YCbCr.gif'); 21 | --------------------------------------------------------------------------------