├── .all-contributorsrc ├── .github └── workflows │ └── release.yaml ├── .gitignore ├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── _art ├── nsfw_confusion93.png └── nsfw_detection.png ├── images └── ReadMe.md ├── nsfw_detector ├── __init__.py └── predict.py ├── requirements.txt ├── setup.py ├── tf1 ├── ReadMe.md ├── nsfw_detector │ ├── __init__.py │ └── keras_predict.py └── training │ ├── inceptionv3_transfer │ ├── callbacks.py │ ├── constants.py │ ├── generators.py │ ├── train_fine_tune.py │ └── train_initialization.py │ ├── mobilenetv2_transfer │ ├── callbacks.py │ ├── constants.py │ ├── generators.py │ ├── train_fine_tune.py │ └── train_initialization.py │ ├── self_clense.py │ └── visuals.py └── training ├── make_nsfw_model.py ├── make_nsfw_model_lib.py ├── train_all_models.cmd └── train_all_models.sh /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "projectName": "nsfw_model", 3 | "projectOwner": "GantMan", 4 | "files": [ 5 | "README.md" 6 | ], 7 | "imageSize": 100, 8 | "commit": true, 9 | "contributors": [ 10 | { 11 | "login": "GantMan", 12 | "name": "Gant Laborde", 13 | "avatar_url": "https://avatars0.githubusercontent.com/u/997157?v=4", 14 | "profile": "http://gantlaborde.com/", 15 | "contributions": [ 16 | "code", 17 | "doc", 18 | "ideas" 19 | ] 20 | }, 21 | { 22 | "login": "bedapudi6788", 23 | "name": "Bedapudi Praneeth", 24 | "avatar_url": "https://avatars2.githubusercontent.com/u/15898654?v=4", 25 | "profile": "http://bpraneeth.com", 26 | "contributions": [ 27 | "code", 28 | "ideas" 29 | ] 30 | }, 31 | { 32 | "login": "TechnikEmpire", 33 | "name": "Jesse Nicholson", 34 | "avatar_url": "https://avatars0.githubusercontent.com/u/11234763?v=4", 35 | "profile": "http://bpraneeth.com", 36 | "contributions": [ 37 | "code", 38 | "doc", 39 | "ideas" 40 | ] 41 | } 42 | ] 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Build and release packages 2 | 3 | on: 4 | pull_request: 5 | push: 6 | tags: 7 | - "*" 8 | 9 | jobs: 10 | # Build the source distribution for PyPI 11 | build_packages: 12 | name: Build packages 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | 18 | - uses: actions/setup-python@v4 19 | with: 20 | python-version: "3.9" 21 | 22 | - name: Build 23 | run: | 24 | python3 -m pip install --upgrade build 25 | python3 -m build 26 | 27 | - uses: actions/upload-artifact@v3 28 | with: 29 | path: dist/*.tar.gz 30 | 31 | # Create a GitHub release 32 | github_release: 33 | name: Create GitHub release 34 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') 35 | needs: [build_packages] 36 | runs-on: ubuntu-latest 37 | permissions: 38 | contents: write 39 | 40 | steps: 41 | - uses: actions/checkout@v3 42 | with: 43 | fetch-depth: 0 44 | 45 | - uses: actions/download-artifact@v3 46 | with: 47 | name: artifact 48 | path: dist 49 | 50 | - name: "✏️ Generate release changelog" 51 | id: changelog 52 | uses: heinrichreimer/github-changelog-generator-action@v2.3 53 | with: 54 | filterByMilestone: false 55 | onlyLastTag: true 56 | pullRequests: true 57 | prWoLabels: true 58 | token: ${{ secrets.GITHUB_TOKEN }} 59 | verbose: true 60 | 61 | - name: Create GitHub release 62 | uses: softprops/action-gh-release@v1 63 | with: 64 | body: ${{ steps.changelog.outputs.changelog }} 65 | files: dist/**/* 66 | 67 | # Test PyPI 68 | test_pypi_publish: 69 | name: Test publishing to PyPI 70 | needs: [build_packages] 71 | runs-on: ubuntu-latest 72 | 73 | steps: 74 | - uses: actions/download-artifact@v3 75 | with: 76 | name: artifact 77 | path: dist 78 | 79 | - uses: pypa/gh-action-pypi-publish@v1.6.4 80 | with: 81 | user: __token__ 82 | password: ${{ secrets.TEST_PYPI_TOKEN }} 83 | repository_url: https://test.pypi.org/legacy/ 84 | skip_existing: true 85 | 86 | # Publish to PyPI 87 | pypi_publish: 88 | name: Publish to PyPI 89 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') 90 | needs: [build_packages] 91 | runs-on: ubuntu-latest 92 | 93 | steps: 94 | - uses: actions/download-artifact@v3 95 | with: 96 | name: artifact 97 | path: dist 98 | 99 | - uses: pypa/gh-action-pypi-publish@v1.6.4 100 | with: 101 | user: __token__ 102 | password: ${{ secrets.PYPI_TOKEN }} 103 | print_hash: true 104 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.hdf5 2 | *.h5 3 | logs/ 4 | .vscode/ 5 | __pycache__ 6 | dist 7 | build 8 | *.egg-info 9 | images/** 10 | !images/ReadMe.md 11 | # Ignore my Visual Studio solution for editing python 12 | NSFWModel/** 13 | trained_models/** 14 | training/train_best_models.cmd 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ### Changed 11 | 12 | - Require Python 3.8+ 13 | - Require Tensorflow 2.2+ for non-macOS 14 | - Require Tensorflow 2.5+ for macOS via tensorflow_macos 15 | 16 | ### Fixed 17 | 18 | - Declares numpy dependency explicitly 19 | 20 | ### Removed 21 | 22 | - Dropped support for Python 3.7 and older 23 | - 3.7 is EOL in a few months and all others are already EOL 24 | 25 | ## [1.1.1] - 2021-12-26 26 | 27 | ### Changed 28 | 29 | - break out numpy (nd array) function 30 | - remove classic app run modes for argparse 31 | - turn down verbosity in image load via file 32 | 33 | ### Added 34 | 35 | - one more example in README for running 36 | 37 | ### Fixed 38 | 39 | - fix requirements for clean system (needs PIL) 40 | 41 | ## [1.2.0] - 2020-05-15 42 | 43 | ### Added 44 | 45 | - New model release 46 | 47 | ## [1.1.0] - 2020-03-03 48 | 49 | ### Changed 50 | 51 | - update to tensorflow 2.1.0 and updated mobilenet-based model 52 | 53 | ## [1.0.0] - 2019-04-04 54 | 55 | ### Added 56 | 57 | - initial creation 58 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | This project contains third-party copyrighted material under different licenses. 2 | Except where otherwise explicitly stated, this project is licensed as follows: 3 | 4 | MIT License 5 | 6 | Copyright (c) 2020 The nsfw_model Developers 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![NSFW Detector logo](https://github.com/GantMan/nsfw_model/blob/master/_art/nsfw_detection.png?raw=true) 2 | 3 | # NSFW Detection Machine Learning Model 4 | 5 | [![All Contributors](https://img.shields.io/badge/all_contributors-2-orange.svg?style=flat-square)](#contributors) 6 | 7 | Trained on 60+ Gigs of data to identify: 8 | - `drawings` - safe for work drawings (including anime) 9 | - `hentai` - hentai and pornographic drawings 10 | - `neutral` - safe for work neutral images 11 | - `porn` - pornographic images, sexual acts 12 | - `sexy` - sexually explicit images, not pornography 13 | 14 | This model powers [NSFW JS](https://github.com/infinitered/nsfwjs) - [More Info](https://shift.infinite.red/avoid-nightmares-nsfw-js-ab7b176978b1) 15 | 16 | ## Current Status: 17 | 18 | 93% Accuracy with the following confusion matrix, based on Inception V3. 19 | ![nsfw confusion matrix](_art/nsfw_confusion93.png) 20 | 21 | ## Requirements: 22 | 23 | See [requirements.txt](requirements.txt). 24 | 25 | ## Usage 26 | 27 | For programmatic use of the library. 28 | 29 | ```python 30 | from nsfw_detector import predict 31 | model = predict.load_model('./nsfw_mobilenet2.224x224.h5') 32 | 33 | # Predict single image 34 | predict.classify(model, '2.jpg') 35 | # {'2.jpg': {'sexy': 4.3454722e-05, 'neutral': 0.00026579265, 'porn': 0.0007733492, 'hentai': 0.14751932, 'drawings': 0.85139805}} 36 | 37 | # Predict multiple images at once 38 | predict.classify(model, ['/Users/bedapudi/Desktop/2.jpg', '/Users/bedapudi/Desktop/6.jpg']) 39 | # {'2.jpg': {'sexy': 4.3454795e-05, 'neutral': 0.00026579312, 'porn': 0.0007733498, 'hentai': 0.14751942, 'drawings': 0.8513979}, '6.jpg': {'drawings': 0.004214506, 'hentai': 0.013342537, 'neutral': 0.01834045, 'porn': 0.4431829, 'sexy': 0.5209196}} 40 | 41 | # Predict for all images in a directory 42 | predict.classify(model, '/Users/bedapudi/Desktop/') 43 | 44 | ``` 45 | 46 | If you've installed the package or use the command-line this should work, too... 47 | 48 | ```sh 49 | # a single image 50 | nsfw-predict --saved_model_path mobilenet_v2_140_224 --image_source test.jpg 51 | 52 | # an image directory 53 | nsfw-predict --saved_model_path mobilenet_v2_140_224 --image_source images 54 | 55 | # a single image (from code/CLI) 56 | python3 nsfw_detector/predict.py --saved_model_path mobilenet_v2_140_224 --image_source test.jpg 57 | 58 | ``` 59 | 60 | 61 | ## Download 62 | Please feel free to use this model to help your products! 63 | 64 | If you'd like to [say thanks for creating this, I'll take a donation for hosting costs](https://www.paypal.me/GantLaborde). 65 | 66 | # Latest Models Zip (v1.1.0) 67 | https://github.com/GantMan/nsfw_model/releases/tag/1.1.0 68 | 69 | ### Original Inception v3 Model (v1.0) 70 | * [Keras 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfw.299x299.h5) 71 | * [TensorflowJS 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfwjs.zip) 72 | * [TensorflowJS Quantized 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/min_nsfwjs.zip) 73 | * [Tensorflow 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfw.299x299.pb) - [Graph if Needed](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms#inspecting-graphs) 74 | 75 | ### Original Mobilenet v2 Model (v1.0) 76 | * [Keras 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/nsfw_mobilenet2.224x224.h5) 77 | * [TensorflowJS 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TFJS_nsfw_mobilenet/tfjs_nsfw_mobilenet.zip) 78 | * [TensorflowJS Quantized 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TFJS_nsfw_mobilenet/tfjs_quant_nsfw_mobilenet.zip) 79 | * [Tensorflow 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TF_nsfw_mobilenet/nsfw_mobilenet.pb) - [Graph if Needed](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms#inspecting-graphs) 80 | * [Tensorflow Quantized 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TF_nsfw_mobilenet/quant_nsfw_mobilenet.pb) - [Graph if Needed](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms#inspecting-graphs) 81 | 82 | ## PyTorch Version 83 | Kudos to the community for creating a PyTorch version with resnet! 84 | https://github.com/yangbisheng2009/nsfw-resnet 85 | 86 | ## TF1 Training Folder Contents 87 | Simple description of the scripts used to create this model: 88 | * `inceptionv3_transfer/` - Folder with all the code to train the Keras based Inception v3 transfer learning model. Includes `constants.py` for configuration, and two scripts for actual training/refinement. 89 | * `mobilenetv2_transfer/` - Folder with all the code to train the Keras based Mobilenet v2 transfer learning model. 90 | * `visuals.py` - The code to create the confusion matrix graphic 91 | * `self_clense.py` - If the training data has significant inaccuracy, `self_clense` helps cross validate errors in the training data in reasonable time. The better the model gets, the better you can use it to clean the training data manually. 92 | 93 | _e.g._ 94 | ```bash 95 | cd training 96 | # Start with all locked transfer of Inception v3 97 | python inceptionv3_transfer/train_initialization.py 98 | 99 | # Continue training on model with fine-tuning 100 | python inceptionv3_transfer/train_fine_tune.py 101 | 102 | # Create a confusion matrix of the model 103 | python visuals.py 104 | ``` 105 | 106 | ## Extra Info 107 | There's no easy way to distribute the training data, but if you'd like to help with this model or train other models, get in touch with me and we can work together. 108 | 109 | Advancements in this model power the quantized TFJS module on https://nsfwjs.com/ 110 | 111 | My Twitter is [@GantLaborde](https://twitter.com/GantLaborde) - I'm a School Of AI Wizard New Orleans. I run the twitter account [@FunMachineLearn](https://twitter.com/FunMachineLearn) 112 | 113 | Learn more about [me](http://gantlaborde.com/) and the [company I work for](https://infinite.red/). 114 | 115 | Special thanks to the [nsfw_data_scraper](https://github.com/alexkimxyz/nsfw_data_scrapper) for the training data. If you're interested in a more detailed analysis of types of NSFW images, you could probably use this repo code with [this data](https://github.com/EBazarov/nsfw_data_source_urls). 116 | 117 | If you need React Native, Elixir, AI, or Machine Learning work, check in with us at [Infinite Red](https://infinite.red/), who make all these experiments possible. We're an amazing software consultancy worldwide! 118 | 119 | ## Cite 120 | ``` 121 | @misc{man, 122 | title={Deep NN for NSFW Detection}, 123 | url={https://github.com/GantMan/nsfw_model}, 124 | journal={GitHub}, 125 | author={Laborde, Gant}} 126 | ``` 127 | 128 | ## Contributors 129 | 130 | Thanks goes to these wonderful people ([emoji key](https://github.com/kentcdodds/all-contributors#emoji-key)): 131 | 132 | 133 | | [
Gant Laborde](http://gantlaborde.com/)
[💻](https://github.com/GantMan/nsfw_model/commits?author=GantMan "Code") [📖](https://github.com/GantMan/nsfw_model/commits?author=GantMan "Documentation") [🤔](#ideas-GantMan "Ideas, Planning, & Feedback") | [
Bedapudi Praneeth](http://bpraneeth.com)
[💻](https://github.com/GantMan/nsfw_model/commits?author=bedapudi6788 "Code") [🤔](#ideas-bedapudi6788 "Ideas, Planning, & Feedback") | 134 | | :---: | :---: | 135 | 136 | 137 | This project follows the [all-contributors](https://github.com/kentcdodds/all-contributors) specification. Contributions of any kind welcome! 138 | -------------------------------------------------------------------------------- /_art/nsfw_confusion93.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GantMan/nsfw_model/699b6796a55604341fbfdffe2b27ced1d868c591/_art/nsfw_confusion93.png -------------------------------------------------------------------------------- /_art/nsfw_detection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GantMan/nsfw_model/699b6796a55604341fbfdffe2b27ced1d868c591/_art/nsfw_detection.png -------------------------------------------------------------------------------- /images/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Images Setup 2 | 3 | Place a folder of images here for each class you want. The training scripts will automatically separate train/test/validation from each 4 | folder and will generate a new label for each folder. The labels will be written to the output model directory. Consult the .cmd and .sh training 5 | scripts in the training folder for full usage in the event that you are invoking the python scripts directly. 6 | 7 | You should pre-resize your images to the target network input size. Otherwise, your training times will be increased by several hours due to the 8 | preprocessing expense and gaining nothing from it. 9 | 10 | *nix users can just make symbolic links for each class here to avoid copying. 11 | 12 | Windows users can, with an admin command prompt, create symbolic links as well with the MKLINK command. Like so: 13 | 14 | `mklink /J link_name C:\real\folder\path` -------------------------------------------------------------------------------- /nsfw_detector/__init__.py: -------------------------------------------------------------------------------- 1 | # empty file for package import -------------------------------------------------------------------------------- /nsfw_detector/predict.py: -------------------------------------------------------------------------------- 1 | #! python 2 | 3 | import argparse 4 | import json 5 | from os import listdir 6 | from os.path import isfile, join, exists, isdir, abspath 7 | 8 | import numpy as np 9 | import tensorflow as tf 10 | from tensorflow import keras 11 | import tensorflow_hub as hub 12 | 13 | 14 | IMAGE_DIM = 224 # required/default image dimensionality 15 | 16 | def load_images(image_paths, image_size, verbose=True): 17 | ''' 18 | Function for loading images into numpy arrays for passing to model.predict 19 | inputs: 20 | image_paths: list of image paths to load 21 | image_size: size into which images should be resized 22 | verbose: show all of the image path and sizes loaded 23 | 24 | outputs: 25 | loaded_images: loaded images on which keras model can run predictions 26 | loaded_image_indexes: paths of images which the function is able to process 27 | 28 | ''' 29 | loaded_images = [] 30 | loaded_image_paths = [] 31 | 32 | if isdir(image_paths): 33 | parent = abspath(image_paths) 34 | image_paths = [join(parent, f) for f in listdir(image_paths) if isfile(join(parent, f))] 35 | elif isfile(image_paths): 36 | image_paths = [image_paths] 37 | 38 | for img_path in image_paths: 39 | try: 40 | if verbose: 41 | print(img_path, "size:", image_size) 42 | image = keras.preprocessing.image.load_img(img_path, target_size=image_size) 43 | image = keras.preprocessing.image.img_to_array(image) 44 | image /= 255 45 | loaded_images.append(image) 46 | loaded_image_paths.append(img_path) 47 | except Exception as ex: 48 | print("Image Load Failure: ", img_path, ex) 49 | 50 | return np.asarray(loaded_images), loaded_image_paths 51 | 52 | def load_model(model_path): 53 | if model_path is None or not exists(model_path): 54 | raise ValueError("saved_model_path must be the valid directory of a saved model to load.") 55 | 56 | model = tf.keras.models.load_model(model_path, custom_objects={'KerasLayer': hub.KerasLayer},compile=False) 57 | return model 58 | 59 | 60 | def classify(model, input_paths, image_dim=IMAGE_DIM, predict_args={}): 61 | """ 62 | Classify given a model, input paths (could be single string), and image dimensionality. 63 | 64 | Optionally, pass predict_args that will be passed to tf.keras.Model.predict(). 65 | """ 66 | images, image_paths = load_images(input_paths, (image_dim, image_dim)) 67 | probs = classify_nd(model, images, predict_args) 68 | return dict(zip(image_paths, probs)) 69 | 70 | 71 | def classify_nd(model, nd_images, predict_args={}): 72 | """ 73 | Classify given a model, image array (numpy) 74 | 75 | Optionally, pass predict_args that will be passed to tf.keras.Model.predict(). 76 | """ 77 | model_preds = model.predict(nd_images, **predict_args) 78 | # preds = np.argsort(model_preds, axis = 1).tolist() 79 | 80 | categories = ['drawings', 'hentai', 'neutral', 'porn', 'sexy'] 81 | 82 | probs = [] 83 | for i, single_preds in enumerate(model_preds): 84 | single_probs = {} 85 | for j, pred in enumerate(single_preds): 86 | single_probs[categories[j]] = float(pred) 87 | probs.append(single_probs) 88 | return probs 89 | 90 | 91 | def main(args=None): 92 | parser = argparse.ArgumentParser( 93 | description="""A script to perform NFSW classification of images""", 94 | epilog=""" 95 | Launch with default model and a test image 96 | python nsfw_detector/predict.py --saved_model_path mobilenet_v2_140_224 --image_source test.jpg 97 | """, formatter_class=argparse.RawTextHelpFormatter) 98 | 99 | submain = parser.add_argument_group('main execution and evaluation functionality') 100 | submain.add_argument('--image_source', dest='image_source', type=str, required=True, 101 | help='A directory of images or a single image to classify') 102 | submain.add_argument('--saved_model_path', dest='saved_model_path', type=str, required=True, 103 | help='The model to load') 104 | submain.add_argument('--image_dim', dest='image_dim', type=int, default=IMAGE_DIM, 105 | help="The square dimension of the model's input shape") 106 | if args is not None: 107 | config = vars(parser.parse_args(args)) 108 | else: 109 | config = vars(parser.parse_args()) 110 | 111 | if config['image_source'] is None or not exists(config['image_source']): 112 | raise ValueError("image_source must be a valid directory with images or a single image to classify.") 113 | 114 | model = load_model(config['saved_model_path']) 115 | image_preds = classify(model, config['image_source'], config['image_dim']) 116 | print(json.dumps(image_preds, indent=2), '\n') 117 | 118 | 119 | if __name__ == "__main__": 120 | main() 121 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow>=2.2.0;sys_platform != 'darwin' 2 | tensorflow_macos>=2.5.0;sys_platform == 'darwin' 3 | tensorflow-hub==0.12.0 4 | pillow 5 | 6 | numpy -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pip install twine 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | # Package meta-data. 15 | NAME = 'nsfw_detector' 16 | DESCRIPTION = 'NSFW Image Detection with Deep Learning' 17 | URL = 'https://github.com/GantMan/nsfw_model' 18 | EMAIL = 'gantman@gmail.com' 19 | AUTHOR = 'Gant Laborde' 20 | REQUIRES_PYTHON = '>=3.8.0' 21 | VERSION = '1.3.0' 22 | 23 | # What packages are optional? 24 | EXTRAS = { 25 | # 'fancy feature': ['django'], 26 | } 27 | 28 | # The rest you shouldn't have to touch too much :) 29 | # ------------------------------------------------ 30 | # Except, perhaps the License and Trove Classifiers! 31 | # If you do change the License, remember to change the Trove Classifier for that! 32 | 33 | here = os.path.abspath(os.path.dirname(__file__)) 34 | 35 | # Import the requirements. 36 | REQUIRED = [] 37 | try: 38 | with io.open(os.path.join(here, 'requirements.txt'), encoding='utf-8') as f: 39 | for line_req in f: 40 | if line_req[0] != '#': 41 | REQUIRED.append(line_req.strip()) 42 | except FileNotFoundError: 43 | REQUIRED = [] 44 | 45 | # Import the README and use it as the long-description. 46 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 47 | try: 48 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 49 | long_description = '\n' + f.read() 50 | with io.open(os.path.join(here, 'CHANGELOG.md'), encoding='utf-8') as f: 51 | long_description = '\n' + f.read() 52 | except FileNotFoundError: 53 | long_description = DESCRIPTION 54 | 55 | # Load the package's __version__.py module as a dictionary. 56 | about = {} 57 | if not VERSION: 58 | with open(os.path.join(here, NAME, '__version__.py')) as f: 59 | exec(f.read(), about) 60 | else: 61 | about['__version__'] = VERSION 62 | 63 | 64 | class UploadCommand(Command): 65 | """Support setup.py upload.""" 66 | 67 | description = 'Build and publish the package.' 68 | user_options = [] 69 | 70 | @staticmethod 71 | def status(s): 72 | """Prints things in bold.""" 73 | print('\033[1m{0}\033[0m'.format(s)) 74 | 75 | def initialize_options(self): 76 | pass 77 | 78 | def finalize_options(self): 79 | pass 80 | 81 | def run(self): 82 | try: 83 | self.status('Removing previous builds…') 84 | rmtree(os.path.join(here, 'dist')) 85 | except OSError: 86 | pass 87 | 88 | self.status('Building Source and Wheel (universal) distribution…') 89 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 90 | 91 | self.status('Uploading the package to PyPI via Twine…') 92 | os.system('twine upload dist/*') 93 | 94 | self.status('Pushing git tags…') 95 | os.system('git tag v{0}'.format(about['__version__'])) 96 | os.system('git push --tags') 97 | 98 | sys.exit() 99 | 100 | 101 | # Where the magic happens: 102 | setup( 103 | name=NAME, 104 | version=about['__version__'], 105 | description=DESCRIPTION, 106 | long_description=long_description, 107 | long_description_content_type='text/markdown', 108 | author=AUTHOR, 109 | author_email=EMAIL, 110 | python_requires=REQUIRES_PYTHON, 111 | url=URL, 112 | packages=find_packages(exclude=('tests',)), 113 | # If your package is a single module, use this instead of 'packages': 114 | # py_modules=['mypackage'], 115 | 116 | # entry_points={ 117 | # 'console_scripts': ['mycli=mymodule:cli'], 118 | # }, 119 | install_requires=REQUIRED, 120 | extras_require=EXTRAS, 121 | include_package_data=True, 122 | license='MIT', 123 | classifiers=[ 124 | # Trove classifiers 125 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 126 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 127 | 'Programming Language :: Python', 128 | 'Programming Language :: Python :: 3', 129 | 'Programming Language :: Python :: 3.8', 130 | 'Programming Language :: Python :: 3.9', 131 | 'Programming Language :: Python :: 3.10', 132 | 'Programming Language :: Python :: 3.11', 133 | 'Programming Language :: Python :: Implementation :: CPython', 134 | 'Programming Language :: Python :: Implementation :: PyPy' 135 | ], 136 | # $ setup.py publish support. 137 | cmdclass={ 138 | 'upload': UploadCommand 139 | }, 140 | entry_points=""" 141 | [console_scripts] 142 | nsfw-predict=nsfw_detector.predict:main 143 | """ 144 | ) 145 | -------------------------------------------------------------------------------- /tf1/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Deprecated Tensorflow 1.x Version 2 | 3 | This version of the nsfw_model training and validation code depends on Tensorflow 1.x and is deprecated. -------------------------------------------------------------------------------- /tf1/nsfw_detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .keras_predict import keras_predictor as NSFWDetector -------------------------------------------------------------------------------- /tf1/nsfw_detector/keras_predict.py: -------------------------------------------------------------------------------- 1 | import keras 2 | import numpy as np 3 | 4 | 5 | def load_images(image_paths, image_size): 6 | ''' 7 | Function for loading images into numpy arrays for passing to model.predict 8 | inputs: 9 | image_paths: list of image paths to load 10 | image_size: size into which images should be resized 11 | 12 | outputs: 13 | loaded_images: loaded images on which keras model can run predictions 14 | loaded_image_indexes: paths of images which the function is able to process 15 | 16 | ''' 17 | loaded_images = [] 18 | loaded_image_paths = [] 19 | 20 | for i, img_path in enumerate(image_paths): 21 | try: 22 | image = keras.preprocessing.image.load_img(img_path, target_size = image_size) 23 | image = keras.preprocessing.image.img_to_array(image) 24 | image /= 255 25 | loaded_images.append(image) 26 | loaded_image_paths.append(img_path) 27 | except Exception as ex: 28 | print(i, img_path, ex) 29 | 30 | return np.asarray(loaded_images), loaded_image_paths 31 | 32 | class keras_predictor(): 33 | ''' 34 | Class for loading model and running predictions. 35 | For example on how to use take a look the if __name__ == '__main__' part. 36 | ''' 37 | nsfw_model = None 38 | 39 | def __init__(self, model_path): 40 | ''' 41 | model = keras_predictor('path_to_weights') 42 | ''' 43 | keras_predictor.nsfw_model = keras.models.load_model(model_path) 44 | 45 | 46 | def predict(self, image_paths = [], batch_size = 32, image_size = (299, 299), categories = ['drawings', 'hentai', 'neutral', 'porn', 'sexy']): 47 | ''' 48 | inputs: 49 | image_paths: list of image paths or can be a string too (for single image) 50 | batch_size: batch_size for running predictions 51 | image_size: size to which the image needs to be resized 52 | categories: since the model predicts numbers, categories is the list of actual names of categories 53 | ''' 54 | if isinstance(image_paths, str): 55 | image_paths = [image_paths] 56 | 57 | loaded_images, loaded_image_paths = load_images(image_paths, image_size) 58 | 59 | if not loaded_image_paths: 60 | return {} 61 | 62 | model_preds = keras_predictor.nsfw_model.predict(loaded_images, batch_size = batch_size) 63 | 64 | preds = np.argsort(model_preds, axis = 1).tolist() 65 | 66 | probs = [] 67 | for i, single_preds in enumerate(preds): 68 | single_probs = [] 69 | for j, pred in enumerate(single_preds): 70 | single_probs.append(model_preds[i][pred]) 71 | preds[i][j] = categories[pred] 72 | 73 | probs.append(single_probs) 74 | 75 | 76 | images_preds = {} 77 | 78 | for i, loaded_image_path in enumerate(loaded_image_paths): 79 | images_preds[loaded_image_path] = {} 80 | for _ in range(len(preds[i])): 81 | images_preds[loaded_image_path][preds[i][_]] = probs[i][_] 82 | 83 | return images_preds 84 | 85 | 86 | if __name__ == '__main__': 87 | print('\n Enter path for the keras weights, leave empty to use "./nsfw.299x299.h5" \n') 88 | weights_path = input().strip() 89 | if not weights_path: weights_path = "../nsfw.299x299.h5" 90 | 91 | m = keras_predictor(weights_path) 92 | 93 | while 1: 94 | print('\n Enter single image path or multiple images seperated by || (2 pipes) \n') 95 | images = input().split('||') 96 | images = [image.strip() for image in images] 97 | print(m.predict(images), '\n') 98 | -------------------------------------------------------------------------------- /tf1/training/inceptionv3_transfer/callbacks.py: -------------------------------------------------------------------------------- 1 | from keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler 2 | from time import time 3 | 4 | # Slow down training deeper into dataset 5 | def schedule(epoch): 6 | if epoch < 6: 7 | # Warmup model first 8 | return .0000032 9 | elif epoch < 12: 10 | return .01 11 | elif epoch < 20: 12 | return .002 13 | elif epoch < 40: 14 | return .0004 15 | elif epoch < 60: 16 | return .00008 17 | elif epoch < 80: 18 | return .000016 19 | elif epoch < 95: 20 | return .0000032 21 | else: 22 | return .0000009 23 | 24 | 25 | def make_callbacks(weights_file): 26 | # checkpoint 27 | filepath = weights_file 28 | checkpoint = ModelCheckpoint( 29 | filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 30 | 31 | # Update info 32 | tensorboard = TensorBoard(log_dir="logs/{}".format(time())) 33 | 34 | # learning rate schedule 35 | lr_scheduler = LearningRateScheduler(schedule) 36 | 37 | # all the goodies 38 | return [lr_scheduler, checkpoint, tensorboard] -------------------------------------------------------------------------------- /tf1/training/inceptionv3_transfer/constants.py: -------------------------------------------------------------------------------- 1 | # Config 2 | SIZES = { 3 | 'basic': 299 4 | } 5 | 6 | NUM_CHANNELS = 3 7 | NUM_CLASSES = 5 8 | GENERATOR_BATCH_SIZE = 32 9 | TOTAL_EPOCHS = 100 10 | STEPS_PER_EPOCH = 500 11 | VALIDATION_STEPS = 50 12 | BASE_DIR = 'D:\\nswf_model_training_data\\data' -------------------------------------------------------------------------------- /tf1/training/inceptionv3_transfer/generators.py: -------------------------------------------------------------------------------- 1 | import os 2 | from keras.preprocessing.image import ImageDataGenerator 3 | import constants 4 | 5 | train_datagen = ImageDataGenerator( 6 | rescale=1./255, 7 | rotation_range=30, 8 | width_shift_range=0.2, 9 | height_shift_range=0.2, 10 | shear_range=0.2, 11 | zoom_range=0.2, 12 | channel_shift_range=20, 13 | horizontal_flip=True, 14 | fill_mode='nearest' 15 | ) 16 | 17 | # Validation data should not be modified 18 | validation_datagen = ImageDataGenerator( 19 | rescale=1./255 20 | ) 21 | 22 | train_dir = os.path.join(constants.BASE_DIR, 'train') 23 | test_dir = os.path.join(constants.BASE_DIR, 'test') 24 | 25 | def create_generators(height, width): 26 | train_generator = train_datagen.flow_from_directory( 27 | train_dir, 28 | target_size=(height, width), 29 | class_mode='categorical', 30 | batch_size=constants.GENERATOR_BATCH_SIZE 31 | ) 32 | 33 | validation_generator = validation_datagen.flow_from_directory( 34 | test_dir, 35 | target_size=(height, width), 36 | class_mode='categorical', 37 | batch_size=constants.GENERATOR_BATCH_SIZE 38 | ) 39 | 40 | return[train_generator, validation_generator] -------------------------------------------------------------------------------- /tf1/training/inceptionv3_transfer/train_fine_tune.py: -------------------------------------------------------------------------------- 1 | import os 2 | from keras.preprocessing.image import ImageDataGenerator 3 | from keras.backend import clear_session 4 | from keras.optimizers import SGD 5 | from pathlib import Path 6 | from keras.models import Sequential, Model, load_model 7 | 8 | # reusable stuff 9 | import constants 10 | import callbacks 11 | import generators 12 | 13 | # No kruft plz 14 | clear_session() 15 | 16 | # Config 17 | height = constants.SIZES['basic'] 18 | width = height 19 | weights_file = "weights.best_inception" + str(height) + ".hdf5" 20 | 21 | print ('Starting from last full model run') 22 | model = load_model("nsfw." + str(width) + "x" + str(height) + ".h5") 23 | 24 | # Unlock a few layers deep in Inception v3 25 | model.trainable = False 26 | set_trainable = False 27 | for layer in model.layers: 28 | if layer.name == 'conv2d_56': 29 | set_trainable = True 30 | if set_trainable: 31 | layer.trainable = True 32 | else: 33 | layer.trainable = False 34 | 35 | # Let's see it 36 | print('Summary') 37 | print(model.summary()) 38 | 39 | # Load checkpoint if one is found 40 | if os.path.exists(weights_file): 41 | print ("loading ", weights_file) 42 | model.load_weights(weights_file) 43 | 44 | # Get all model callbacks 45 | callbacks_list = callbacks.make_callbacks(weights_file) 46 | 47 | print('Compile model') 48 | opt = SGD(momentum=.9) 49 | model.compile( 50 | loss='categorical_crossentropy', 51 | optimizer=opt, 52 | metrics=['accuracy'] 53 | ) 54 | 55 | # Get training/validation data via generators 56 | train_generator, validation_generator = generators.create_generators(height, width) 57 | 58 | print('Start training!') 59 | history = model.fit_generator( 60 | train_generator, 61 | callbacks=callbacks_list, 62 | epochs=constants.TOTAL_EPOCHS, 63 | steps_per_epoch=constants.STEPS_PER_EPOCH, 64 | shuffle=True, 65 | workers=4, 66 | use_multiprocessing=False, 67 | validation_data=validation_generator, 68 | validation_steps=constants.VALIDATION_STEPS 69 | ) 70 | 71 | # Save it for later 72 | print('Saving Model') 73 | model.save("nsfw." + str(width) + "x" + str(height) + ".h5") 74 | -------------------------------------------------------------------------------- /tf1/training/inceptionv3_transfer/train_initialization.py: -------------------------------------------------------------------------------- 1 | import os 2 | from keras.preprocessing.image import ImageDataGenerator 3 | from keras.backend import clear_session 4 | from keras.optimizers import SGD 5 | from pathlib import Path 6 | from keras.applications import InceptionV3 7 | from keras.models import Sequential, Model, load_model 8 | from keras.layers import Dense, Dropout, Flatten, AveragePooling2D 9 | from keras import initializers, regularizers 10 | 11 | # reusable stuff 12 | import constants 13 | import callbacks 14 | import generators 15 | 16 | # No kruft plz 17 | clear_session() 18 | 19 | # Config 20 | height = constants.SIZES['basic'] 21 | width = height 22 | weights_file = "weights.best_inception" + str(height) + ".hdf5" 23 | 24 | conv_base = InceptionV3( 25 | weights='imagenet', 26 | include_top=False, 27 | input_shape=(height, width, constants.NUM_CHANNELS) 28 | ) 29 | 30 | # First time run, no unlocking 31 | conv_base.trainable = False 32 | 33 | # Let's see it 34 | print('Summary') 35 | print(conv_base.summary()) 36 | 37 | # Let's construct that top layer replacement 38 | x = conv_base.output 39 | x = AveragePooling2D(pool_size=(8, 8))(x) 40 | x - Dropout(0.4)(x) 41 | x = Flatten()(x) 42 | x = Dense(256, activation='relu', kernel_initializer=initializers.he_normal(seed=None), kernel_regularizer=regularizers.l2(.0005))(x) 43 | x = Dropout(0.5)(x) 44 | # Essential to have another layer for better accuracy 45 | x = Dense(128,activation='relu', kernel_initializer=initializers.he_normal(seed=None))(x) 46 | x = Dropout(0.25)(x) 47 | predictions = Dense(constants.NUM_CLASSES, kernel_initializer="glorot_uniform", activation='softmax')(x) 48 | 49 | print('Stacking New Layers') 50 | model = Model(inputs = conv_base.input, outputs=predictions) 51 | 52 | # Load checkpoint if one is found 53 | if os.path.exists(weights_file): 54 | print ("loading ", weights_file) 55 | model.load_weights(weights_file) 56 | 57 | # Get all model callbacks 58 | callbacks_list = callbacks.make_callbacks(weights_file) 59 | 60 | print('Compile model') 61 | # originally adam, but research says SGD with scheduler 62 | # opt = Adam(lr=0.001, amsgrad=True) 63 | opt = SGD(momentum=.9) 64 | model.compile( 65 | loss='categorical_crossentropy', 66 | optimizer=opt, 67 | metrics=['accuracy'] 68 | ) 69 | 70 | # Get training/validation data via generators 71 | train_generator, validation_generator = generators.create_generators(height, width) 72 | 73 | print('Start training!') 74 | history = model.fit_generator( 75 | train_generator, 76 | callbacks=callbacks_list, 77 | epochs=constants.TOTAL_EPOCHS, 78 | steps_per_epoch=constants.STEPS_PER_EPOCH, 79 | shuffle=True, 80 | workers=4, 81 | use_multiprocessing=False, 82 | validation_data=validation_generator, 83 | validation_steps=constants.VALIDATION_STEPS 84 | ) 85 | 86 | # Save it for later 87 | print('Saving Model') 88 | model.save("nsfw." + str(width) + "x" + str(height) + ".h5") 89 | -------------------------------------------------------------------------------- /tf1/training/mobilenetv2_transfer/callbacks.py: -------------------------------------------------------------------------------- 1 | from keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler 2 | from time import time 3 | 4 | # Slow down training deeper into dataset 5 | def schedule(epoch): 6 | if epoch < 6: 7 | # Warmup model first 8 | return .0000032 9 | elif epoch < 12: 10 | return .01 11 | elif epoch < 20: 12 | return .002 13 | elif epoch < 40: 14 | return .0004 15 | elif epoch < 60: 16 | return .00008 17 | elif epoch < 80: 18 | return .000016 19 | elif epoch < 95: 20 | return .0000032 21 | else: 22 | return .0000009 23 | 24 | 25 | def make_callbacks(weights_file): 26 | # checkpoint 27 | filepath = weights_file 28 | checkpoint = ModelCheckpoint( 29 | filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 30 | 31 | # Update info 32 | tensorboard = TensorBoard(log_dir="logs/{}".format(time())) 33 | 34 | # learning rate schedule 35 | lr_scheduler = LearningRateScheduler(schedule) 36 | 37 | # all the goodies 38 | return [lr_scheduler, checkpoint, tensorboard] -------------------------------------------------------------------------------- /tf1/training/mobilenetv2_transfer/constants.py: -------------------------------------------------------------------------------- 1 | # Config 2 | SIZES = { 3 | 'basic': 224 4 | } 5 | 6 | NUM_CHANNELS = 3 7 | NUM_CLASSES = 5 8 | GENERATOR_BATCH_SIZE = 32 9 | TOTAL_EPOCHS = 100 10 | STEPS_PER_EPOCH = 500 11 | VALIDATION_STEPS = 50 12 | BASE_DIR = 'D:\\nswf_model_training_data\\data' -------------------------------------------------------------------------------- /tf1/training/mobilenetv2_transfer/generators.py: -------------------------------------------------------------------------------- 1 | import os 2 | from keras.preprocessing.image import ImageDataGenerator 3 | import constants 4 | 5 | train_datagen = ImageDataGenerator( 6 | rescale=1./255, 7 | rotation_range=30, 8 | width_shift_range=0.2, 9 | height_shift_range=0.2, 10 | shear_range=0.2, 11 | zoom_range=0.2, 12 | channel_shift_range=20, 13 | horizontal_flip=True, 14 | fill_mode='nearest' 15 | ) 16 | 17 | # Validation data should not be modified 18 | validation_datagen = ImageDataGenerator( 19 | rescale=1./255 20 | ) 21 | 22 | train_dir = os.path.join(constants.BASE_DIR, 'train') 23 | test_dir = os.path.join(constants.BASE_DIR, 'test') 24 | 25 | def create_generators(height, width): 26 | train_generator = train_datagen.flow_from_directory( 27 | train_dir, 28 | target_size=(height, width), 29 | class_mode='categorical', 30 | batch_size=constants.GENERATOR_BATCH_SIZE 31 | ) 32 | 33 | validation_generator = validation_datagen.flow_from_directory( 34 | test_dir, 35 | target_size=(height, width), 36 | class_mode='categorical', 37 | batch_size=constants.GENERATOR_BATCH_SIZE 38 | ) 39 | 40 | return[train_generator, validation_generator] -------------------------------------------------------------------------------- /tf1/training/mobilenetv2_transfer/train_fine_tune.py: -------------------------------------------------------------------------------- 1 | import os 2 | from keras.preprocessing.image import ImageDataGenerator 3 | from keras.backend import clear_session 4 | from keras.optimizers import SGD 5 | from pathlib import Path 6 | from keras.models import Sequential, Model, load_model 7 | 8 | # reusable stuff 9 | import constants 10 | import callbacks 11 | import generators 12 | 13 | # No kruft plz 14 | clear_session() 15 | import tensorflow as tf 16 | from keras.backend.tensorflow_backend import set_session 17 | config = tf.ConfigProto() 18 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 19 | sess = tf.Session(config=config) 20 | set_session(sess) # set this TensorFlow session as the default session for Keras 21 | 22 | # Config 23 | height = constants.SIZES['basic'] 24 | width = height 25 | weights_file = "weights.best_mobilenet" + str(height) + ".hdf5" 26 | 27 | print ('Starting from last full model run') 28 | model = load_model("nsfwnsfw_mobilenet2." + str(width) + "x" + str(height) + ".h5") 29 | 30 | # Unlock a few layers deep in Mobilenet v2 31 | model.trainable = False 32 | set_trainable = False 33 | for layer in model.layers: 34 | if layer.name == 'block_11_expand': 35 | set_trainable = True 36 | if set_trainable: 37 | layer.trainable = True 38 | else: 39 | layer.trainable = False 40 | 41 | # Let's see it 42 | print('Summary') 43 | print(model.summary()) 44 | 45 | # Load checkpoint if one is found 46 | if os.path.exists(weights_file): 47 | print ("loading ", weights_file) 48 | model.load_weights(weights_file) 49 | 50 | # Get all model callbacks 51 | callbacks_list = callbacks.make_callbacks(weights_file) 52 | 53 | print('Compile model') 54 | opt = SGD(momentum=.9) 55 | model.compile( 56 | loss='categorical_crossentropy', 57 | optimizer=opt, 58 | metrics=['accuracy'] 59 | ) 60 | 61 | # Get training/validation data via generators 62 | train_generator, validation_generator = generators.create_generators(height, width) 63 | 64 | print('Start training!') 65 | history = model.fit_generator( 66 | train_generator, 67 | callbacks=callbacks_list, 68 | epochs=constants.TOTAL_EPOCHS, 69 | steps_per_epoch=constants.STEPS_PER_EPOCH, 70 | shuffle=True, 71 | workers=4, 72 | use_multiprocessing=False, 73 | validation_data=validation_generator, 74 | validation_steps=constants.VALIDATION_STEPS 75 | ) 76 | 77 | # Save it for later 78 | print('Saving Model') 79 | model.save("nsfwnsfw_mobilenet2." + str(width) + "x" + str(height) + ".h5") 80 | -------------------------------------------------------------------------------- /tf1/training/mobilenetv2_transfer/train_initialization.py: -------------------------------------------------------------------------------- 1 | import os 2 | from keras.preprocessing.image import ImageDataGenerator 3 | from keras.backend import clear_session 4 | from keras.optimizers import SGD 5 | from pathlib import Path 6 | from keras.applications.mobilenet_v2 import MobileNetV2 7 | from keras.models import Sequential, Model, load_model 8 | from keras.layers import Dense, Dropout, Flatten, AveragePooling2D 9 | from keras import initializers, regularizers 10 | 11 | # reusable stuff 12 | import constants 13 | import callbacks 14 | import generators 15 | 16 | # No kruft plz 17 | clear_session() 18 | import tensorflow as tf 19 | from keras.backend.tensorflow_backend import set_session 20 | config = tf.ConfigProto() 21 | config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU 22 | sess = tf.Session(config=config) 23 | set_session(sess) # set this TensorFlow session as the default session for Keras 24 | 25 | # Config 26 | height = constants.SIZES['basic'] 27 | width = height 28 | weights_file = "weights.best_mobilenet" + str(height) + ".hdf5" 29 | 30 | conv_base = MobileNetV2( 31 | weights='imagenet', 32 | include_top=False, 33 | input_shape=(height, width, constants.NUM_CHANNELS) 34 | ) 35 | 36 | # First time run, no unlocking 37 | conv_base.trainable = False 38 | 39 | # Let's see it 40 | print('Summary') 41 | print(conv_base.summary()) 42 | 43 | # Let's construct that top layer replacement 44 | x = conv_base.output 45 | x = AveragePooling2D(pool_size=(7, 7))(x) 46 | x = Flatten()(x) 47 | x = Dense(256, activation='relu', kernel_initializer=initializers.he_normal(seed=None), kernel_regularizer=regularizers.l2(.0005))(x) 48 | x = Dropout(0.5)(x) 49 | # Essential to have another layer for better accuracy 50 | x = Dense(128,activation='relu', kernel_initializer=initializers.he_normal(seed=None))(x) 51 | x = Dropout(0.25)(x) 52 | predictions = Dense(constants.NUM_CLASSES, kernel_initializer="glorot_uniform", activation='softmax')(x) 53 | 54 | print('Stacking New Layers') 55 | model = Model(inputs = conv_base.input, outputs=predictions) 56 | 57 | # Load checkpoint if one is found 58 | if os.path.exists(weights_file): 59 | print ("loading ", weights_file) 60 | model.load_weights(weights_file) 61 | 62 | # Get all model callbacks 63 | callbacks_list = callbacks.make_callbacks(weights_file) 64 | 65 | print('Compile model') 66 | # originally adam, but research says SGD with scheduler 67 | # opt = Adam(lr=0.001, amsgrad=True) 68 | opt = SGD(momentum=.9) 69 | model.compile( 70 | loss='categorical_crossentropy', 71 | optimizer=opt, 72 | metrics=['accuracy'] 73 | ) 74 | 75 | # Get training/validation data via generators 76 | train_generator, validation_generator = generators.create_generators(height, width) 77 | 78 | print('Start training!') 79 | history = model.fit_generator( 80 | train_generator, 81 | callbacks=callbacks_list, 82 | epochs=constants.TOTAL_EPOCHS, 83 | steps_per_epoch=constants.STEPS_PER_EPOCH, 84 | shuffle=True, 85 | workers=4, 86 | use_multiprocessing=False, 87 | validation_data=validation_generator, 88 | validation_steps=constants.VALIDATION_STEPS 89 | ) 90 | 91 | # Save it for later 92 | print('Saving Model') 93 | model.save("nsfw_mobilenet2." + str(width) + "x" + str(height) + ".h5") 94 | -------------------------------------------------------------------------------- /tf1/training/self_clense.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from keras.preprocessing import image 4 | from pathlib import Path 5 | from keras.models import load_model 6 | from sklearn.metrics import confusion_matrix, classification_report 7 | 8 | # Initialize 9 | model = load_model("nsfw.299x299.h5") 10 | image_size = 299 11 | file_count = 0 12 | x_test = [] 13 | y_test = [] 14 | mistakes = [] 15 | categories = ['drawings', 'hentai', 'neutral', 'porn', 'sexy'] 16 | base_dir = 'D:\\nswf_model_training_data\\data' 17 | batch_size = 1000 18 | page = 0 19 | 20 | # CONFIGURE EACH RUN 21 | group = 'train' 22 | category_id = 4 23 | mistaken_as = 2 24 | file_type = "jpg" 25 | 26 | 27 | def process_batch(batch_x, batch_y): 28 | print("Batch Check " + str(file_count)) 29 | # Convert the list of images to a numpy array 30 | x_array = np.array(batch_x) 31 | 32 | # Make predictions (arrays of size 5, with probabilities) 33 | predictions = model.predict(x_array) 34 | max_predictions = np.argmax(predictions, axis=1) 35 | 36 | for idx, prediction in enumerate(max_predictions): 37 | if prediction != category_id: 38 | # We have a mistake! Do we log it? 39 | if prediction == mistaken_as: 40 | mistakes.append(batch_y[idx]) 41 | 42 | # Copies categorization failures to the mistakes folder for analysis 43 | def copy_all_failures(): 44 | for file_info in mistakes: 45 | os.rename(file_info["path"], base_dir + "\\" + group + "\\mistakes\\" + str(file_info["filename"])) 46 | 47 | print("Starting Self-clense for " + categories[category_id]) 48 | # Load the data set by looping over every image file in path 49 | for image_file in Path(base_dir + "\\" + group + "\\" + 50 | categories[category_id]).glob("**/*." + file_type): 51 | file_info = {"path": image_file, "filename": os.path.basename(image_file)} 52 | 53 | top = (page + 1) * batch_size 54 | file_count += 1 55 | 56 | # Load the current image file 57 | image_data = image.load_img(image_file, target_size=(image_size, image_size)) 58 | 59 | # Convert the loaded image file to a numpy array 60 | image_array = image.img_to_array(image_data) 61 | image_array /= 255 62 | 63 | # Add the current image to our list of test images 64 | x_test.append(image_array) 65 | # To identify failed predictions 66 | y_test.append(file_info) 67 | 68 | # Kick off a processing to clear RAM 69 | if file_count == top: 70 | process_batch(x_test, y_test) 71 | # move next batch moment 72 | page += 1 73 | # reset in-memory 74 | x_test = [] 75 | y_test = [] 76 | 77 | process_batch(x_test, y_test) 78 | copy_all_failures() 79 | print('Out of ' + str(file_count) + ' images of "' + str(categories[category_id]) + '" ' + str(len(mistakes)) + ' are mistaken as "' + str(categories[mistaken_as]) + '"') -------------------------------------------------------------------------------- /tf1/training/visuals.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import matplotlib.pyplot as plt 3 | import os 4 | import numpy as np 5 | from keras.preprocessing import image 6 | from pathlib import Path 7 | from keras.models import load_model 8 | from sklearn.metrics import confusion_matrix, classification_report 9 | 10 | model = load_model("nsfw.299x299.h5") 11 | test_dir = 'D:\\nswf_model_training_data\\data\\test' 12 | image_size = 299 13 | x_test = [] 14 | y_test = [] 15 | file_count = 0 16 | update_frequency = 1000 17 | 18 | class_names = ['drawings', 'hentai', 'neutral', 'porn', 'sexy'] 19 | 20 | for image_file in Path(test_dir).glob("**/*.jpg"): 21 | file_count += 1 22 | # Load the current image file 23 | image_data = image.load_img(image_file, target_size=(image_size, image_size)) 24 | 25 | # Convert the loaded image file to a numpy array 26 | image_array = image.img_to_array(image_data) 27 | image_array /= 255 28 | 29 | # Add to list of test images 30 | x_test.append(image_array) 31 | # Now add answer derived from folder 32 | path_name = os.path.dirname(image_file) 33 | folder_name = os.path.basename(path_name) 34 | y_test.append(class_names.index(folder_name)) 35 | 36 | if file_count % update_frequency == 0: 37 | print("Processed " + str(file_count) + " - Current Folder: " + folder_name) 38 | 39 | 40 | def plot_confusion_matrix(cm, classes, 41 | normalize=False, 42 | title='Confusion matrix', 43 | cmap=plt.cm.get_cmap('Blues')): 44 | """ 45 | This function prints and plots the confusion matrix. 46 | Normalization can be applied by setting `normalize=True`. 47 | """ 48 | if normalize: 49 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 50 | print("Normalized confusion matrix") 51 | else: 52 | print('Confusion matrix, without normalization') 53 | 54 | print(cm) 55 | 56 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 57 | plt.title(title) 58 | plt.colorbar() 59 | tick_marks = np.arange(len(classes)) 60 | plt.xticks(tick_marks, classes, rotation=45) 61 | plt.yticks(tick_marks, classes) 62 | 63 | fmt = '.2f' if normalize else 'd' 64 | thresh = cm.max() / 2. 65 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 66 | plt.text(j, i, format(cm[i, j], fmt), 67 | horizontalalignment="center", 68 | color="white" if cm[i, j] > thresh else "black") 69 | 70 | plt.ylabel('True label') 71 | plt.xlabel('Predicted label') 72 | plt.tight_layout() 73 | 74 | x_test = np.array(x_test) 75 | predictions = model.predict(x_test) 76 | y_pred = np.argmax(predictions, axis=1) 77 | 78 | # Compute confusion matrix 79 | cnf_matrix = confusion_matrix(y_test, y_pred) 80 | np.set_printoptions(precision=2) 81 | 82 | # Plot normalized confusion matrix 83 | plt.figure() 84 | plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, 85 | title='Normalized confusion matrix') 86 | 87 | plt.show() -------------------------------------------------------------------------------- /training/make_nsfw_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Trains a TensorFlow model based on directories of images. 16 | 17 | This program builds, trains and exports a TensorFlow 2.x model that classifies 18 | natural images (photos) into a fixed set of classes. The classes are learned 19 | from a user-supplied dataset of images, stored as a directory of subdirectories 20 | of JPEG images, each subdirectory representing one class. 21 | 22 | The model is built from a pre-trained image feature vector module from 23 | TensorFlow Hub (in its TF2/SavedModel format, not the older hub.Module format) 24 | followed by a linear classifier. The linear classifier, and optionally also 25 | the TF Hub module, are trained on the new dataset. TF Hub offers a variety of 26 | suitable modules with various size/accuracy tradeoffs. 27 | 28 | The resulting model can be exported in TensorFlow's standard SavedModel format 29 | and as a .tflite file for deployment to mobile devices with TensorFlow Lite. 30 | TODO(b/139467904): Add support for post-training model optimization. 31 | 32 | For more information, please see the README file next to the source code, 33 | https://github.com/tensorflow/hub/blob/master/tensorflow_hub/tools/make_image_classifier/README.md 34 | """ 35 | 36 | # NOTE: This is an expanded, command-line version of 37 | # https://github.com/tensorflow/hub/blob/master/examples/colab/tf2_image_retraining.ipynb 38 | # PLEASE KEEP THEM IN SYNC, such that running tests for this program 39 | # provides assurance that the code in the colab notebook works. 40 | 41 | from __future__ import absolute_import 42 | from __future__ import division 43 | from __future__ import print_function 44 | from __future__ import unicode_literals 45 | from pathlib import Path 46 | from absl import app 47 | from absl import flags 48 | from absl import logging 49 | from tensorflow import keras 50 | from tensorflow.core.framework import attr_value_pb2 51 | from tensorflow.core.framework import graph_pb2 52 | from tensorflow.core.framework import node_def_pb2 53 | from tensorflow.keras import layers 54 | from tensorflow.keras.mixed_precision import experimental as mixed_precision 55 | from tensorflow.python.framework import dtypes 56 | from tensorflow.python.framework import ops 57 | from tensorflow.python.framework import tensor_util 58 | from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 59 | from tensorflow.python.platform import tf_logging as logging 60 | from tensorflow.python.util import deprecation 61 | from tensorflow.python.util.tf_export import tf_export 62 | import collections 63 | import copy 64 | import make_nsfw_model_lib as lib 65 | import numpy as np 66 | import os 67 | import re 68 | import six 69 | import tempfile 70 | import tensorflow as tf 71 | import tensorflow.keras.backend as K 72 | import tensorflow_hub as hub 73 | 74 | _DEFAULT_HPARAMS = lib.get_default_hparams() 75 | 76 | flags.DEFINE_string( 77 | "image_dir", None, 78 | "A directory with subdirectories of images, one per class. " 79 | "If unset, the TensorFlow Flowers example dataset will be used. " 80 | "Internally, the dataset is split into training and validation pieces.") 81 | flags.DEFINE_string( 82 | "tfhub_module", 83 | "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4", 84 | "Which TF Hub module to use. Must be a module in TF2/SavedModel format " 85 | "for computing image feature vectors.") 86 | flags.DEFINE_integer( 87 | "image_size", None, 88 | "The height and width of images to feed into --tfhub_module. " 89 | "(For now, must be set manually for modules with variable input size.)") 90 | flags.DEFINE_string( 91 | "saved_model_dir", None, 92 | "The final model is exported as a SavedModel directory with this name.") 93 | flags.DEFINE_string( 94 | "tflite_output_file", None, 95 | "The final model is exported as a .tflite flatbuffers file with this name.") 96 | flags.DEFINE_string( 97 | "labels_output_file", None, 98 | "Where to save the labels (that is, names of image subdirectories). " 99 | "The lines in this file appear in the same order as the predictions " 100 | "of the model.") 101 | flags.DEFINE_float( 102 | "assert_accuracy_at_least", None, 103 | "If set, the program fails if the validation accuracy at the end of " 104 | "training is less than this number (between 0 and 1), and no export of " 105 | "the trained model happens.") 106 | flags.DEFINE_integer( 107 | "train_epochs", _DEFAULT_HPARAMS.train_epochs, 108 | "Training will do this many iterations over the dataset.") 109 | flags.DEFINE_bool( 110 | "do_fine_tuning", _DEFAULT_HPARAMS.do_fine_tuning, 111 | "If set, the --tfhub_module is trained together with the rest of " 112 | "the model being built.") 113 | flags.DEFINE_integer( 114 | "batch_size", _DEFAULT_HPARAMS.batch_size, 115 | "Each training step samples a batch of this many images " 116 | "from the training data. (You may need to shrink this when using a GPU " 117 | "and getting out-of-memory errors. Avoid values below 8 when re-training " 118 | "modules that use batch normalization.)") 119 | flags.DEFINE_float( 120 | "learning_rate", _DEFAULT_HPARAMS.learning_rate, 121 | "The learning rate to use for gradient descent training.") 122 | flags.DEFINE_float( 123 | "momentum", _DEFAULT_HPARAMS.momentum, 124 | "The momentum parameter to use for gradient descent training.") 125 | flags.DEFINE_float( 126 | "dropout_rate", _DEFAULT_HPARAMS.dropout_rate, 127 | "The fraction of the input units to drop, used in dropout layer.") 128 | flags.DEFINE_bool( 129 | "is_deprecated_tfhub_module", False, 130 | "Whether or not the supplied TF hub module is old and from Tensorflow 1.") 131 | flags.DEFINE_float( 132 | "label_smoothing", _DEFAULT_HPARAMS.label_smoothing, 133 | "The degree of label smoothing to use.") 134 | flags.DEFINE_float( 135 | "validation_split", _DEFAULT_HPARAMS.validation_split, 136 | "The percentage of data to use for validation.") 137 | flags.DEFINE_string( 138 | 'optimizer', _DEFAULT_HPARAMS.optimizer, 139 | 'The name of the optimizer, one of "adadelta", "adagrad", "adam",' 140 | '"ftrl", "momentum", "sgd" or "rmsprop".') 141 | flags.DEFINE_float( 142 | 'adadelta_rho', _DEFAULT_HPARAMS.adadelta_rho, 143 | 'The decay rate for adadelta.') 144 | flags.DEFINE_float( 145 | 'adagrad_initial_accumulator_value', _DEFAULT_HPARAMS.adagrad_initial_accumulator_value, 146 | 'Starting value for the AdaGrad accumulators.') 147 | flags.DEFINE_float( 148 | 'adam_beta1', _DEFAULT_HPARAMS.adam_beta1, 149 | 'The exponential decay rate for the 1st moment estimates.') 150 | flags.DEFINE_float( 151 | 'adam_beta2', _DEFAULT_HPARAMS.adam_beta2, 152 | 'The exponential decay rate for the 2nd moment estimates.') 153 | flags.DEFINE_float('opt_epsilon', _DEFAULT_HPARAMS.opt_epsilon, 'Epsilon term for the optimizer.') 154 | flags.DEFINE_float('ftrl_learning_rate_power', _DEFAULT_HPARAMS.ftrl_learning_rate_power, 155 | 'The learning rate power.') 156 | flags.DEFINE_float( 157 | 'ftrl_initial_accumulator_value', _DEFAULT_HPARAMS.ftrl_initial_accumulator_value, 158 | 'Starting value for the FTRL accumulators.') 159 | flags.DEFINE_float( 160 | 'ftrl_l1', _DEFAULT_HPARAMS.ftrl_l1, 'The FTRL l1 regularization strength.') 161 | 162 | flags.DEFINE_float( 163 | 'ftrl_l2', _DEFAULT_HPARAMS.ftrl_l2, 'The FTRL l2 regularization strength.') 164 | flags.DEFINE_float('rmsprop_momentum', _DEFAULT_HPARAMS.rmsprop_momentum, 'Momentum.') 165 | flags.DEFINE_float('rmsprop_decay', _DEFAULT_HPARAMS.rmsprop_decay, 'Decay term for RMSProp.') 166 | flags.DEFINE_bool( 167 | "do_data_augmentation", False, 168 | "Whether or not to do data augmentation.") 169 | flags.DEFINE_bool( 170 | "use_mixed_precision", False, 171 | "Whether or not to use NVIDIA mixed precision. Requires NVIDIA card with at least compute level 7.0") 172 | 173 | FLAGS = flags.FLAGS 174 | 175 | 176 | def _get_hparams_from_flags(): 177 | """Creates dict of hyperparameters from flags.""" 178 | return lib.HParams( 179 | train_epochs=FLAGS.train_epochs, 180 | do_fine_tuning=FLAGS.do_fine_tuning, 181 | batch_size=FLAGS.batch_size, 182 | learning_rate=FLAGS.learning_rate, 183 | momentum=FLAGS.momentum, 184 | dropout_rate=FLAGS.dropout_rate, 185 | label_smoothing=FLAGS.label_smoothing, 186 | validation_split=FLAGS.validation_split, 187 | optimizer=FLAGS.optimizer, 188 | adadelta_rho=FLAGS.adadelta_rho, 189 | adagrad_initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value, 190 | adam_beta1=FLAGS.adam_beta1, 191 | adam_beta2=FLAGS.adam_beta2, 192 | opt_epsilon=FLAGS.opt_epsilon, 193 | ftrl_learning_rate_power=FLAGS.ftrl_learning_rate_power, 194 | ftrl_initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value, 195 | ftrl_l1=FLAGS.ftrl_l1, 196 | ftrl_l2=FLAGS.ftrl_l2, 197 | rmsprop_momentum=FLAGS.rmsprop_momentum, 198 | rmsprop_decay=FLAGS.rmsprop_decay, 199 | do_data_augmentation=FLAGS.do_data_augmentation, 200 | use_mixed_precision=FLAGS.use_mixed_precision 201 | ) 202 | 203 | 204 | 205 | 206 | def _check_keras_dependencies(): 207 | """Checks dependencies of tf.keras.preprocessing.image are present. 208 | 209 | This function may come to depend on flag values that determine the kind 210 | of preprocessing being done. 211 | 212 | Raises: 213 | ImportError: If dependencies are missing. 214 | """ 215 | try: 216 | tf.keras.preprocessing.image.load_img(six.BytesIO()) 217 | except ImportError: 218 | print("\n*** Unsatisfied dependencies of keras_preprocessing.image. ***\n" 219 | "To install them, use your system's equivalent of\n" 220 | "pip install tensorflow_hub[make_image_classifier]\n") 221 | raise 222 | except Exception as e: # pylint: disable=broad-except 223 | # Loading from dummy content as above is expected to fail in other ways. 224 | pass 225 | 226 | 227 | def _assert_accuracy(train_result, assert_accuracy_at_least): 228 | # Fun fact: With TF1 behavior, the key was called "val_acc". 229 | val_accuracy = train_result.history["val_accuracy"][-1] 230 | accuracy_message = "found {:f}, expected at least {:f}".format( 231 | val_accuracy, assert_accuracy_at_least) 232 | if val_accuracy >= assert_accuracy_at_least: 233 | print("ACCURACY PASSED:", accuracy_message) 234 | else: 235 | raise AssertionError("ACCURACY FAILED:", accuracy_message) 236 | 237 | def main(args): 238 | """Main function to be called by absl.app.run() after flag parsing.""" 239 | del args 240 | 241 | #policy = mixed_precision.Policy('mixed_float16') 242 | #mixed_precision.set_policy(policy) 243 | 244 | #tf.config.gpu.set_per_process_memory_fraction(0.75) 245 | #tf.config.gpu.set_per_process_memory_growth(False) 246 | physical_devices = tf.config.list_physical_devices('GPU') 247 | try: 248 | tf.config.experimental.set_memory_growth(physical_devices[0], True) 249 | print('Configured device') 250 | except: 251 | # Invalid device or cannot modify virtual devices once initialized. 252 | pass 253 | 254 | _check_keras_dependencies() 255 | hparams = _get_hparams_from_flags() 256 | 257 | image_dir = FLAGS.image_dir or lib.get_default_image_dir() 258 | 259 | model, labels, train_result, frozen_graph = lib.make_image_classifier( 260 | FLAGS.tfhub_module, image_dir, hparams, FLAGS.image_size, FLAGS.saved_model_dir) 261 | if FLAGS.assert_accuracy_at_least: 262 | _assert_accuracy(train_result, FLAGS.assert_accuracy_at_least) 263 | print("Done with training.") 264 | 265 | if FLAGS.labels_output_file: 266 | labels_dir_path = os.path.dirname(FLAGS.labels_output_file) 267 | # Ensure dir structure exists 268 | Path(labels_dir_path).mkdir(parents=True, exist_ok=True) 269 | with tf.io.gfile.GFile(FLAGS.labels_output_file, "w") as f: 270 | f.write("\n".join(labels + ("",))) 271 | print("Labels written to", FLAGS.labels_output_file) 272 | 273 | saved_model_dir = FLAGS.saved_model_dir 274 | 275 | if FLAGS.tflite_output_file and not saved_model_dir: 276 | # We need a SavedModel for conversion, even if the user did not request it. 277 | saved_model_dir = tempfile.mkdtemp() 278 | 279 | if saved_model_dir: 280 | # Ensure dir structure exists 281 | Path(saved_model_dir).mkdir(parents=True, exist_ok=True) 282 | tf.saved_model.save(model, saved_model_dir) 283 | keras_model_path = os.path.join(saved_model_dir, "saved_model.h5") 284 | weights_path = os.path.join(saved_model_dir, "saved_model_weights.h5") 285 | model.save(keras_model_path) 286 | model.save_weights(weights_path) 287 | print("SavedModel model exported to", saved_model_dir) 288 | 289 | if FLAGS.tflite_output_file: 290 | tflite_dir_path = os.path.dirname(FLAGS.tflite_output_file) 291 | # Ensure dir structure exists 292 | Path(tflite_dir_path).mkdir(parents=True, exist_ok=True) 293 | converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir) 294 | lite_model_content = converter.convert() 295 | with tf.io.gfile.GFile(FLAGS.tflite_output_file, "wb") as f: 296 | f.write(lite_model_content) 297 | print("TFLite model exported to", FLAGS.tflite_output_file) 298 | 299 | if saved_model_dir: 300 | # Save the frozen graph 301 | # Ensure dir structure exists 302 | Path(saved_model_dir).mkdir(parents=True, exist_ok=True) 303 | tf.io.write_graph(graph_or_graph_def=frozen_graph, 304 | logdir=saved_model_dir, 305 | name="frozen_graph.pb", 306 | as_text=False) 307 | 308 | 309 | def _ensure_tf2(): 310 | """Ensure running with TensorFlow 2 behavior. 311 | 312 | This function is safe to call even before flags have been parsed. 313 | 314 | Raises: 315 | ImportError: If tensorflow is too old for proper TF2 behavior. 316 | """ 317 | logging.info("Running with tensorflow %s (git version %s) and hub %s", 318 | tf.__version__, tf.__git_version__, hub.__version__) 319 | if tf.__version__.startswith("1."): 320 | if tf.__git_version__ == "unknown": # For internal testing use. 321 | try: 322 | tf.compat.v1.enable_v2_behavior() 323 | return 324 | except AttributeError: 325 | pass # Fail below for missing enabler function. 326 | raise ImportError("Sorry, this program needs TensorFlow 2.") 327 | 328 | 329 | def run_main(): 330 | """Entry point equivalent to executing this file.""" 331 | _ensure_tf2() 332 | app.run(main) 333 | 334 | 335 | if __name__ == "__main__": 336 | run_main() 337 | -------------------------------------------------------------------------------- /training/make_nsfw_model_lib.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Trains a TensorFlow model based on directories of images. 16 | 17 | This library provides the major pieces for make_image_classifier (see there). 18 | """ 19 | 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | from __future__ import unicode_literals 24 | import multiprocessing 25 | from pathlib import Path 26 | from absl import app 27 | from absl import flags 28 | from absl import logging 29 | from tensorflow import keras 30 | from tensorflow.keras.mixed_precision import experimental as mixed_precision 31 | from tensorflow.core.framework import attr_value_pb2 32 | from tensorflow.core.framework import graph_pb2 33 | from tensorflow.core.framework import node_def_pb2 34 | from tensorflow.keras import layers 35 | from tensorflow.python.framework import dtypes 36 | from tensorflow.python.framework import ops 37 | from tensorflow.python.framework import tensor_util 38 | from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2 39 | from tensorflow.python.platform import tf_logging as logging 40 | from tensorflow.python.util import deprecation 41 | from tensorflow.python.util.tf_export import tf_export 42 | import collections 43 | import copy 44 | import numpy as np 45 | import os 46 | import re 47 | import six 48 | import tempfile 49 | import tensorflow as tf 50 | import tensorflow.keras.backend as K 51 | import tensorflow_hub as hub 52 | 53 | 54 | _DEFAULT_IMAGE_URL = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz" 55 | 56 | # From https://github.com/tensorflow/hub/issues/390#issuecomment-544489095 57 | # Woops, this doesn't actually work. Sad face emoji. 58 | class Wrapper(tf.train.Checkpoint): 59 | def __init__(self, spec): 60 | super(Wrapper, self).__init__() 61 | self.module = hub.load(spec, tags=[]) 62 | self.variables = self.module.variables 63 | self.trainable_variables = [] 64 | def __call__(self, x): 65 | return self.module.signatures["default"](x)["default"] 66 | 67 | def get_default_image_dir(): 68 | """Returns the path to a default image dataset, downloading it if needed.""" 69 | return tf.keras.utils.get_file("flower_photos", 70 | _DEFAULT_IMAGE_URL, untar=True) 71 | 72 | def configure_optimizer(hparams): 73 | """Configures the optimizer used for training. 74 | 75 | Args: 76 | learning_rate: A scalar or `Tensor` learning rate. 77 | 78 | Returns: 79 | An instance of an optimizer. 80 | 81 | Raises: 82 | ValueError: if hparams.optimizer is not recognized. 83 | """ 84 | if hparams.optimizer == 'adadelta': 85 | optimizer = tf.keras.optimizers.Adadelta( 86 | hparams.learning_rate, 87 | rho=hparams.adadelta_rho, 88 | epsilon=hparams.opt_epsilon) 89 | elif hparams.optimizer == 'adagrad': 90 | optimizer = tf.keras.optimizers.Adagrad( 91 | hparams.learning_rate, 92 | initial_accumulator_value=hparams.adagrad_initial_accumulator_value) 93 | elif hparams.optimizer == 'adam': 94 | optimizer = tf.keras.optimizers.Adam( 95 | hparams.learning_rate, 96 | beta_1=hparams.adam_beta1, 97 | beta_2=hparams.adam_beta2, 98 | epsilon=hparams.opt_epsilon) 99 | elif hparams.optimizer == 'ftrl': 100 | optimizer = tf.keras.optimizers.Ftrl( 101 | hparams.learning_rate, 102 | learning_rate_power=hparams.ftrl_learning_rate_power, 103 | initial_accumulator_value=hparams.ftrl_initial_accumulator_value, 104 | l1_regularization_strength=hparams.ftrl_l1, 105 | l2_regularization_strength=hparams.ftrl_l2) 106 | elif hparams.optimizer == 'rmsprop': 107 | optimizer = tf.keras.optimizers.RMSprop(learning_rate=hparams.learning_rate, epsilon=hparams.opt_epsilon, momentum=hparams.rmsprop_momentum) 108 | elif hparams.optimizer == 'sgd': 109 | optimizer = tf.keras.optimizers.SGD(learning_rate=hparams.learning_rate, momentum=hparams.momentum) 110 | else: 111 | raise ValueError('Optimizer [%s] was not recognized' % hparams.optimizer) 112 | return optimizer 113 | 114 | 115 | class HParams( 116 | collections.namedtuple("HParams", [ 117 | "train_epochs", "do_fine_tuning", "batch_size", "learning_rate", 118 | "momentum", "dropout_rate", "label_smoothing", "validation_split", 119 | "optimizer", "adadelta_rho", "adagrad_initial_accumulator_value", 120 | "adam_beta1", "adam_beta2", "opt_epsilon", "ftrl_learning_rate_power", 121 | "ftrl_initial_accumulator_value", "ftrl_l1", "ftrl_l2", "rmsprop_momentum", 122 | "rmsprop_decay", "do_data_augmentation", "use_mixed_precision" 123 | ])): 124 | """The hyperparameters for make_image_classifier. 125 | 126 | train_epochs: Training will do this many iterations over the dataset. 127 | do_fine_tuning: If true, the Hub module is trained together with the 128 | classification layer on top. 129 | batch_size: Each training step samples a batch of this many images. 130 | learning_rate: The learning rate to use for gradient descent training. 131 | momentum: The momentum parameter to use for gradient descent training. 132 | dropout_rate: The fraction of the input units to drop, used in dropout layer. 133 | """ 134 | 135 | 136 | def get_default_hparams(): 137 | """Returns a fresh HParams object initialized to default values.""" 138 | return HParams( 139 | train_epochs=5, 140 | do_fine_tuning=False, 141 | batch_size=32, 142 | learning_rate=0.005, 143 | momentum=0.9, 144 | dropout_rate=0.2, 145 | label_smoothing=0.1, 146 | validation_split=.20, 147 | optimizer='rmsprop', 148 | adadelta_rho=0.95, 149 | adagrad_initial_accumulator_value=0.1, 150 | adam_beta1=0.9, 151 | adam_beta2=0.999, 152 | opt_epsilon=1.0, 153 | ftrl_learning_rate_power=-0.5, 154 | ftrl_initial_accumulator_value=0.1, 155 | ftrl_l1=0.0, 156 | ftrl_l2=0.0, 157 | rmsprop_momentum=0.9, 158 | rmsprop_decay=0.9, 159 | do_data_augmentation=False, 160 | use_mixed_precision=False 161 | ) 162 | 163 | 164 | def _get_data_with_keras(image_dir, image_size, batch_size, 165 | validation_size=0.2, do_data_augmentation=False): 166 | """Gets training and validation data via keras_preprocessing. 167 | 168 | Args: 169 | image_dir: A Python string with the name of a directory that contains 170 | subdirectories of images, one per class. 171 | image_size: A list or tuple with 2 Python integers specifying 172 | the fixed height and width to which input images are resized. 173 | batch_size: A Python integer with the number of images per batch of 174 | training and validation data. 175 | do_data_augmentation: An optional boolean, controlling whether the 176 | training dataset is augmented by randomly distorting input images. 177 | 178 | Returns: 179 | A nested tuple ((train_data, train_size), 180 | (valid_data, valid_size), labels) where: 181 | train_data, valid_data: Generators for use with Model.fit_generator, 182 | each yielding tuples (images, labels) where 183 | images is a float32 Tensor of shape [batch_size, height, width, 3] 184 | with pixel values in range [0,1], 185 | labels is a float32 Tensor of shape [batch_size, num_classes] 186 | with one-hot encoded classes. 187 | train_size, valid_size: Python integers with the numbers of training 188 | and validation examples, respectively. 189 | labels: A tuple of strings with the class labels (subdirectory names). 190 | The index of a label in this tuple is the numeric class id. 191 | """ 192 | datagen_kwargs = dict(rescale=1./255, 193 | # TODO(b/139467904): Expose this as a flag. 194 | validation_split=validation_size) 195 | dataflow_kwargs = dict(target_size=image_size, batch_size=batch_size, 196 | interpolation="bilinear") 197 | 198 | valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator( 199 | **datagen_kwargs) 200 | valid_generator = valid_datagen.flow_from_directory( 201 | image_dir, subset="validation", shuffle=False, **dataflow_kwargs) 202 | 203 | if do_data_augmentation: 204 | # TODO(b/139467904): Expose the following constants as flags. 205 | train_datagen = tf.keras.preprocessing.image.ImageDataGenerator( 206 | rotation_range=40, horizontal_flip=True, width_shift_range=0.2, 207 | height_shift_range=0.2, shear_range=0.2, zoom_range=0.2, 208 | **datagen_kwargs) 209 | else: 210 | train_datagen = valid_datagen 211 | 212 | train_generator = train_datagen.flow_from_directory( 213 | image_dir, subset="training", shuffle=True, **dataflow_kwargs) 214 | 215 | indexed_labels = [(index, label) 216 | for label, index in train_generator.class_indices.items()] 217 | sorted_indices, sorted_labels = zip(*sorted(indexed_labels)) 218 | assert sorted_indices == tuple(range(len(sorted_labels))) 219 | return ((train_generator, train_generator.samples), 220 | (valid_generator, valid_generator.samples), 221 | sorted_labels) 222 | 223 | 224 | def _image_size_for_module(module_layer, requested_image_size=None): 225 | """Returns the input image size to use with the given module. 226 | 227 | Args: 228 | module_layer: A hub.KerasLayer initialized from a Hub module expecting 229 | image input. 230 | requested_image_size: An optional Python integer with the user-requested 231 | height and width of the input image; or None. 232 | 233 | Returns: 234 | A tuple (height, width) of Python integers that can be used as input 235 | image size for the given module_layer. 236 | 237 | Raises: 238 | ValueError: If requested_image_size is set but incompatible with the module. 239 | ValueError: If the module does not specify a particular inpurt size and 240 | requested_image_size is not set. 241 | """ 242 | # TODO(b/139530454): Use a library helper function once available. 243 | # The stop-gap code below assumes any concrete function backing the 244 | # module call will accept a batch of images with the one accepted size. 245 | 246 | module_image_size = tuple( 247 | module_layer._func.__call__ # pylint:disable=protected-access 248 | .concrete_functions[0].structured_input_signature[0][0].shape[1:3]) 249 | 250 | if requested_image_size is None: 251 | if None in module_image_size: 252 | raise ValueError("Must specify an image size because " 253 | "the selected TF Hub module specifies none.") 254 | else: 255 | return module_image_size 256 | else: 257 | requested_image_size = tf.TensorShape([requested_image_size, requested_image_size]) 258 | assert requested_image_size.is_fully_defined() 259 | 260 | if requested_image_size.is_compatible_with(module_image_size): 261 | return tuple(requested_image_size.as_list()) 262 | else: 263 | raise ValueError("The selected TF Hub module expects image size {}, " 264 | "but size {} is requested".format( 265 | module_image_size, 266 | tuple(requested_image_size.as_list()))) 267 | 268 | 269 | def build_model(module_layer, hparams, image_size, num_classes): 270 | """Builds the full classifier model from the given module_layer. 271 | 272 | Args: 273 | module_layer: Pre-trained tfhub model layer. 274 | hparams: A namedtuple of hyperparameters. This function expects 275 | .dropout_rate: The fraction of the input units to drop, used in dropout 276 | layer. 277 | image_size: The input image size to use with the given module layer. 278 | num_classes: Number of the classes to be predicted. 279 | 280 | Returns: 281 | The full classifier model. 282 | """ 283 | # TODO(b/139467904): Expose the hyperparameters below as flags. 284 | 285 | if hparams.dropout_rate is not None and hparams.dropout_rate > 0: 286 | model = tf.keras.Sequential([ 287 | tf.keras.Input(shape=(image_size[0], image_size[1], 3), name='input', dtype='float32'), 288 | module_layer, 289 | tf.keras.layers.Dropout(rate=hparams.dropout_rate), 290 | tf.keras.layers.Dense( 291 | num_classes, 292 | kernel_regularizer=tf.keras.regularizers.l2(0.0001)), 293 | tf.keras.layers.Activation('softmax', dtype='float32', name='prediction') 294 | ]) 295 | else: 296 | model = tf.keras.Sequential([ 297 | tf.keras.Input(shape=(image_size[0], image_size[1], 3), name='input', dtype='float32'), 298 | module_layer, 299 | tf.keras.layers.Dense( 300 | num_classes, 301 | kernel_regularizer=None), 302 | tf.keras.layers.Activation('softmax', dtype='float32', name='prediction') 303 | ]) 304 | 305 | print(model.summary()) 306 | return model 307 | 308 | 309 | def train_model(model, hparams, train_data_and_size, valid_data_and_size): 310 | """Trains model with the given data and hyperparameters. 311 | 312 | Args: 313 | model: The tf.keras.Model from _build_model(). 314 | hparams: A namedtuple of hyperparameters. This function expects 315 | .train_epochs: a Python integer with the number of passes over the 316 | training dataset; 317 | .learning_rate: a Python float forwarded to the optimizer; 318 | .momentum: a Python float forwarded to the optimizer; 319 | .batch_size: a Python integer, the number of examples returned by each 320 | call to the generators. 321 | train_data_and_size: A (data, size) tuple in which data is training data to 322 | be fed in tf.keras.Model.fit(), size is a Python integer with the 323 | numbers of training. 324 | valid_data_and_size: A (data, size) tuple in which data is validation data 325 | to be fed in tf.keras.Model.fit(), size is a Python integer with the 326 | numbers of validation. 327 | 328 | Returns: 329 | The tf.keras.callbacks.History object returned by tf.keras.Model.fit(). 330 | """ 331 | 332 | earlystop_callback = tf.keras.callbacks.EarlyStopping( 333 | monitor='val_accuracy', min_delta=0.0001, 334 | patience=1) 335 | 336 | train_data, train_size = train_data_and_size 337 | valid_data, valid_size = valid_data_and_size 338 | # TODO(b/139467904): Expose this hyperparameter as a flag. 339 | loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=hparams.label_smoothing) 340 | 341 | if hparams.use_mixed_precision is True: 342 | optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(configure_optimizer(hparams)) 343 | else: 344 | optimizer = configure_optimizer(hparams) 345 | 346 | model.compile( 347 | optimizer=optimizer, 348 | loss=loss, 349 | metrics=["accuracy"]) 350 | steps_per_epoch = train_size // hparams.batch_size 351 | validation_steps = valid_size // hparams.batch_size 352 | return model.fit( 353 | train_data, 354 | use_multiprocessing=False, 355 | workers=multiprocessing.cpu_count() -1, 356 | epochs=hparams.train_epochs, 357 | callbacks=[earlystop_callback], 358 | steps_per_epoch=steps_per_epoch, 359 | validation_data=valid_data, 360 | validation_steps=validation_steps) 361 | 362 | def model_to_frozen_graph(model): 363 | 364 | # Convert Keras model to ConcreteFunction 365 | # In the resulting graph, "self" will be the input node 366 | # and the very last softmax layer in the graph will be the 367 | # output prediction node. 368 | 369 | full_model = tf.function(model) 370 | full_model = full_model.get_concrete_function( 371 | tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype)) 372 | 373 | # Get frozen ConcreteFunction 374 | frozen_func = convert_variables_to_constants_v2(full_model) 375 | input_graph = frozen_func.graph.as_graph_def() 376 | 377 | types_to_remove = {"CheckNumerics": True, "Identity": True} 378 | 379 | input_nodes = input_graph.node 380 | names_to_remove = {} 381 | 382 | # We're going to clean up some junk nodes that we do not 383 | # need outside of training. I assume these are inherited 384 | # from tensorflow hub. 385 | for node in input_nodes: 386 | if '/input_control_node/_'.upper() in node.name.upper(): 387 | names_to_remove[node.name] = True 388 | 389 | if '/output_control_node/_'.upper() in node.name.upper(): 390 | names_to_remove[node.name] = True 391 | 392 | # What we're doing here is double-iterating over the graph nodes 393 | # looking for disconnected/orphaned nodes. Any node who's name 394 | # cannot be found inside the inputs of another node is considered 395 | # trash that caused me pain and suffering for two days, so they're 396 | # going to be deleted. 397 | # 398 | # On a serious note, these are leftover junk (I assume) from the 399 | # tensorflow hub input that is not needed outside of training. 400 | for node in input_nodes: 401 | noOutput = True 402 | for inner in input_nodes: 403 | resa = [i for i in inner.input if node.name.upper() in i.upper()] 404 | if len(resa) > 0: 405 | noOutput = False 406 | 407 | if noOutput is True: 408 | names_to_remove[node.name] = True 409 | 410 | # We're going to look for junk nodes (used only in training) that are connected 411 | # to our output Softmax layer and mark those for deletion as well. 412 | for node in input_nodes: 413 | if node.op in types_to_remove: 414 | 415 | # Find all nodes of type Identity that are connected to a Softmax (our output) 416 | found = [i for i in node.input if 'softmax'.upper() in i.upper()] 417 | 418 | if found is not None and len(found) > 0: 419 | names_to_remove[node.name] = True 420 | 421 | # The rest of this code is basically a straight-copy-and-paste from 422 | # the remove_nodes function of TF1. 423 | nodes_after_removal = [] 424 | for node in input_nodes: 425 | if node.name in names_to_remove: 426 | continue 427 | new_node = node_def_pb2.NodeDef() 428 | new_node.CopyFrom(node) 429 | input_before_removal = node.input 430 | del new_node.input[:] 431 | for full_input_name in input_before_removal: 432 | input_name = re.sub(r"^\^", "", full_input_name) 433 | if input_name in names_to_remove: 434 | continue 435 | new_node.input.append(full_input_name) 436 | nodes_after_removal.append(new_node) 437 | 438 | # TODO - We may be able to just delete all of this code here, as it 439 | # it was unused by me and I was able to get a functional output. 440 | # When this TODO is tackled, just delete everything that has to do 441 | # with node splicing. In the final output, these nodes become either 442 | # Const or NoOp nodes anyway so they're junk, but harmless junk. 443 | types_to_splice = {"Identityzzz": True} 444 | control_input_names = set() 445 | node_names_with_control_input = set() 446 | for node in nodes_after_removal: 447 | for node_input in node.input: 448 | if "^" in node_input: 449 | control_input_names.add(node_input.replace("^", "")) 450 | node_names_with_control_input.add(node.name) 451 | 452 | names_to_splice = {} 453 | for node in nodes_after_removal: 454 | if node.op in types_to_splice and node.name not in protected_nodes: 455 | # We don't want to remove nodes that have control edge inputs, because 456 | # they might be involved in subtle dependency issues that removing them 457 | # will jeopardize. 458 | if node.name not in node_names_with_control_input and len(node.input) > 0: 459 | names_to_splice[node.name] = node.input[0] 460 | 461 | # We also don't want to remove nodes which are used as control edge inputs. 462 | names_to_splice = {name: value for name, value in names_to_splice.items() 463 | if name not in control_input_names} 464 | 465 | nodes_after_splicing = [] 466 | for node in nodes_after_removal: 467 | if node.name in names_to_splice: 468 | continue 469 | new_node = node_def_pb2.NodeDef() 470 | new_node.CopyFrom(node) 471 | input_before_removal = node.input 472 | del new_node.input[:] 473 | for full_input_name in input_before_removal: 474 | input_name = re.sub(r"^\^", "", full_input_name) 475 | while input_name in names_to_splice: 476 | full_input_name = names_to_splice[input_name] 477 | input_name = re.sub(r"^\^", "", full_input_name) 478 | new_node.input.append(full_input_name) 479 | nodes_after_splicing.append(new_node) 480 | 481 | output_graph = graph_pb2.GraphDef() 482 | output_graph.node.extend(nodes_after_splicing) 483 | return output_graph 484 | 485 | def make_image_classifier(tfhub_module, image_dir, hparams, 486 | requested_image_size=None, saveModelDir=False): 487 | """Builds and trains a TensorFLow model for image classification. 488 | 489 | Args: 490 | tfhub_module: A Python string with the handle of the Hub module. 491 | image_dir: A Python string naming a directory with subdirectories of images, 492 | one per class. 493 | hparams: A HParams object with hyperparameters controlling the training. 494 | requested_image_size: A Python integer controlling the size of images to 495 | feed into the Hub module. If the module has a fixed input size, this 496 | must be omitted or set to that same value. 497 | """ 498 | 499 | print("Using hparams:") 500 | for key, value in hparams._asdict().items(): 501 | print("\t{0} : {1}".format(key, value)) 502 | 503 | module_layer = hub.KerasLayer(tfhub_module, trainable=hparams.do_fine_tuning) 504 | 505 | image_size = _image_size_for_module(module_layer, requested_image_size) 506 | print("Using module {} with image size {}".format( 507 | tfhub_module, image_size)) 508 | train_data_and_size, valid_data_and_size, labels = _get_data_with_keras( 509 | image_dir, image_size, hparams.batch_size, hparams.validation_split, hparams.do_data_augmentation) 510 | print("Found", len(labels), "classes:", ", ".join(labels)) 511 | 512 | model = build_model(module_layer, hparams, image_size, len(labels)) 513 | 514 | # If we are fine-tuning, check and see if weights 515 | # already exists at the output directory. This way, a user 516 | # can simply run two consecutive training sessions. One without 517 | # fine-tuning, followed by another with. 518 | if hparams.do_fine_tuning: 519 | if saveModelDir is not None: 520 | existingWeightsPath = os.path.join(saveModelDir, "saved_model_weights.h5") 521 | if os.path.exists(existingWeightsPath): 522 | print("Loading existing weights for fine-tuning") 523 | model.load_weights(existingWeightsPath) 524 | 525 | train_result = train_model(model, hparams, train_data_and_size, 526 | valid_data_and_size) 527 | 528 | # Tear down model, set training to 0 and then re-create. 529 | # 1 - Save model weights as Keras H5. 530 | 531 | tempDir = tempfile.gettempdir() 532 | tempModelWeightsFile = os.path.join(tempDir, "weights.h5") 533 | 534 | model.save_weights(tempModelWeightsFile) 535 | 536 | # 2 - Set training to 0 537 | 538 | K.clear_session() 539 | K.set_learning_phase(0) 540 | 541 | # 3 - Create model again 542 | 543 | model = build_model(module_layer, hparams, image_size, len(labels)) 544 | 545 | # 4 - Load model weights. 546 | 547 | model.load_weights(tempModelWeightsFile) 548 | 549 | # Clean up temp weights file 550 | os.remove(tempModelWeightsFile) 551 | 552 | # 5 - Pass model to lib.model_to_frozen_graph. 553 | frozen_inference_graph = model_to_frozen_graph(model) 554 | 555 | return model, labels, train_result, frozen_inference_graph 556 | -------------------------------------------------------------------------------- /training/train_all_models.cmd: -------------------------------------------------------------------------------- 1 | :: You can add more models types from here: https://tfhub.dev/s?module-type=image-classification&tf-version=tf2 2 | :: However, you must choose Tensorflow 2 models. V1 models will not work here. 3 | :: https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4 4 | :: https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4 5 | :: https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 6 | :: https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4 7 | :: 8 | :: If you get CUDA_OUT_OF_MEMORY crash, you need to pass --batch_size NUMBER, reducing until you don't get this error. 9 | :: It is advised by Google not to have a batch size < 8. 10 | 11 | :: Note that we set all of our target epochs to over 9000. This is because the trainer just uses early stopping internally. 12 | 13 | :: Train Mobilenet V2 140 14 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\mobilenet_v2_140_224 --labels_output_file %cd%\..\trained_models\mobilenet_v2_140_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4 --tflite_output_file %cd%\..\trained_models\mobilenet_v2_140_224\saved_model.tflite --train_epochs 9001 --batch_size 32 --do_fine_tuning --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 15 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 16 | :: tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\mobilenet_v2_140_224 %cd%\..\trained_models\mobilenet_v2_140_224\web_model 17 | :: Or, for a quantized (1 byte) version 18 | :: tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\mobilenet_v2_140_224 %cd%\..\trained_models\mobilenet_v2_140_224\web_model_quantized --quantization_bytes 1 19 | 20 | :: Wait for Python/CUDA/GPU to recover. Seems to die without this. 21 | Timeout /T 60 /Nobreak 22 | 23 | :: Train Resnet V2 50 24 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\resnet_v2_50_224 --labels_output_file %cd%\..\trained_models\resnet_v2_50_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4 --tflite_output_file %cd%\..\trained_models\resnet_v2_50_224\saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 25 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 26 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\resnet_v2_50_224 %cd%\..\trained_models\resnet_v2_50_224\web_model 27 | :: Or, for a quantized (1 byte) version 28 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\resnet_v2_50_224 %cd%\..\trained_models\resnet_v2_50_224\web_model_quantized --quantization_bytes 1 29 | 30 | :: Wait for Python/CUDA/GPU to recover. Seems to die without this. 31 | Timeout /T 60 /Nobreak 32 | 33 | :: Train Inception V3 34 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\inception_v3_224 --labels_output_file %cd%\..\trained_models\inception_v3_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 --tflite_output_file %cd%\..\trained_models\inception_v3_224\saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 35 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 36 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\inception_v3_224 %cd%\..\trained_models\inception_v3_224\web_model 37 | :: Or, for a quantized (1 byte) version 38 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\inception_v3_224 %cd%\..\trained_models\inception_v3_224\web_model_quantized --quantization_bytes 1 39 | 40 | :: Wait for Python/CUDA/GPU to recover. Seems to die without this. 41 | Timeout /T 60 /Nobreak 42 | 43 | :: Train NasNetMobile 44 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\nasnet_a_224 --labels_output_file %cd%\..\trained_models\nasnet_a_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4 --tflite_output_file %cd%\..\trained_models\nasnet_a_224\saved_model.tflite --train_epochs 9001 --batch_size 24 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 45 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 46 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\nasnet_a_224 %cd%\..\trained_models\nasnet_a_224\web_modely 47 | :: Or, for a quantized (1 byte) version 48 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\nasnet_a_224 %cd%\..\trained_models\nasnet_a_224\web_modely_quantized --quantization_bytes 1 -------------------------------------------------------------------------------- /training/train_all_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # You can add more models types from here: https://tfhub.dev/s?module-type=image-classification&tf-version=tf2 3 | # However, you must choose Tensorflow 2 models. V1 models will not work here. 4 | # https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4 5 | # https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4 6 | # https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 7 | # https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4 8 | # 9 | # If you get CUDA_OUT_OF_MEMORY crash, you need to pass --batch_size NUMBER, reducing until you don't get this error. 10 | # It is advised by Google not to have a batch size < 8. 11 | 12 | # Note that we set all of our target epochs to over 9000. This is because the trainer just uses early stopping internally. 13 | 14 | # Train Mobilenet V2 140 15 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/mobilenet_v2_140_224 --labels_output_file $PWD/../trained_models/mobilenet_v2_140_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4 --tflite_output_file $PWD/../trained_models/mobilenet_v2_140_224/saved_model.tflite --train_epochs 9001 --batch_size 32 --do_fine_tuning --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 16 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 17 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/mobilenet_v2_140_224 $PWD/../trained_models/mobilenet_v2_140_224/web_model 18 | # Or, for a quantized (1 byte) version 19 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/mobilenet_v2_140_224 $PWD/../trained_models/mobilenet_v2_140_224/web_model_quantized --quantization_bytes 1 20 | 21 | # Wait for Python/CUDA/GPU to recover. Seems to die without this. 22 | sleep 60 23 | 24 | # Train Resnet V2 50 25 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/resnet_v2_50_224 --labels_output_file $PWD/../trained_models/resnet_v2_50_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4 --tflite_output_file $PWD/../trained_models/resnet_v2_50_224/saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 26 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 27 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/resnet_v2_50_224 $PWD/../trained_models/resnet_v2_50_224/web_model 28 | # Or, for a quantized (1 byte) version 29 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/resnet_v2_50_224 $PWD/../trained_models/resnet_v2_50_224/web_model_quantized --quantization_bytes 1 30 | 31 | # Wait for Python/CUDA/GPU to recover. Seems to die without this. 32 | sleep 60 33 | 34 | # Train Inception V3 35 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/inception_v3_224 --labels_output_file $PWD/../trained_models/inception_v3_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 --tflite_output_file $PWD/../trained_models/inception_v3_224/saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 36 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 37 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/inception_v3_224 $PWD/../trained_models/inception_v3_224/web_model 38 | # Or, for a quantized (1 byte) version 39 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/inception_v3_224 $PWD/../trained_models/inception_v3_224/web_model_quantized --quantization_bytes 1 40 | 41 | # Wait for Python/CUDA/GPU to recover. Seems to die without this. 42 | sleep 60 43 | 44 | # Train NasNetMobile 45 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/nasnet_a_224 --labels_output_file $PWD/../trained_models/nasnet_a_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4 --tflite_output_file $PWD/../trained_models/nasnet_a_224/saved_model.tflite --train_epochs 9001 --batch_size 24 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0 46 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF. 47 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/nasnet_a_224 $PWD/../trained_models/nasnet_a_224/web_modely 48 | # Or, for a quantized (1 byte) version 49 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/nasnet_a_224 $PWD/../trained_models/nasnet_a_224/web_modely_quantized --quantization_bytes 1 --------------------------------------------------------------------------------