├── .all-contributorsrc
├── .github
    └── workflows
    │   └── release.yaml
├── .gitignore
├── CHANGELOG.md
├── LICENSE.md
├── README.md
├── _art
    ├── nsfw_confusion93.png
    └── nsfw_detection.png
├── images
    └── ReadMe.md
├── nsfw_detector
    ├── __init__.py
    └── predict.py
├── requirements.txt
├── setup.py
├── tf1
    ├── ReadMe.md
    ├── nsfw_detector
    │   ├── __init__.py
    │   └── keras_predict.py
    └── training
    │   ├── inceptionv3_transfer
    │       ├── callbacks.py
    │       ├── constants.py
    │       ├── generators.py
    │       ├── train_fine_tune.py
    │       └── train_initialization.py
    │   ├── mobilenetv2_transfer
    │       ├── callbacks.py
    │       ├── constants.py
    │       ├── generators.py
    │       ├── train_fine_tune.py
    │       └── train_initialization.py
    │   ├── self_clense.py
    │   └── visuals.py
└── training
    ├── make_nsfw_model.py
    ├── make_nsfw_model_lib.py
    ├── train_all_models.cmd
    └── train_all_models.sh


/.all-contributorsrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "projectName": "nsfw_model",
 3 |   "projectOwner": "GantMan",
 4 |   "files": [
 5 |     "README.md"
 6 |   ],
 7 |   "imageSize": 100,
 8 |   "commit": true,
 9 |   "contributors": [
10 |     {
11 |       "login": "GantMan",
12 |       "name": "Gant Laborde",
13 |       "avatar_url": "https://avatars0.githubusercontent.com/u/997157?v=4",
14 |       "profile": "http://gantlaborde.com/",
15 |       "contributions": [
16 |         "code",
17 |         "doc",
18 |         "ideas"
19 |       ]
20 |     },
21 |     {
22 |       "login": "bedapudi6788",
23 |       "name": "Bedapudi Praneeth",
24 |       "avatar_url": "https://avatars2.githubusercontent.com/u/15898654?v=4",
25 |       "profile": "http://bpraneeth.com",
26 |       "contributions": [
27 |         "code",
28 |         "ideas"
29 |       ]
30 |     },
31 |     {
32 |       "login": "TechnikEmpire",
33 |       "name": "Jesse Nicholson",
34 |       "avatar_url": "https://avatars0.githubusercontent.com/u/11234763?v=4",
35 |       "profile": "http://bpraneeth.com",
36 |       "contributions": [
37 |         "code",
38 | 	"doc",
39 |         "ideas"
40 |       ]
41 |     }
42 |   ]
43 | }
44 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
  1 | name: Build and release packages
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   push:
  6 |     tags:
  7 |       - "*"
  8 | 
  9 | jobs:
 10 |   # Build the source distribution for PyPI
 11 |   build_packages:
 12 |     name: Build packages
 13 |     runs-on: ubuntu-latest
 14 | 
 15 |     steps:
 16 |       - uses: actions/checkout@v3
 17 | 
 18 |       - uses: actions/setup-python@v4
 19 |         with:
 20 |           python-version: "3.9"
 21 | 
 22 |       - name: Build
 23 |         run: |
 24 |           python3 -m pip install --upgrade build
 25 |           python3 -m build
 26 | 
 27 |       - uses: actions/upload-artifact@v3
 28 |         with:
 29 |           path: dist/*.tar.gz
 30 | 
 31 | # Create a GitHub release
 32 |   github_release:
 33 |     name: Create GitHub release
 34 |     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
 35 |     needs: [build_packages]
 36 |     runs-on: ubuntu-latest
 37 |     permissions:
 38 |       contents: write
 39 | 
 40 |     steps:
 41 |       - uses: actions/checkout@v3
 42 |         with:
 43 |           fetch-depth: 0
 44 | 
 45 |       - uses: actions/download-artifact@v3
 46 |         with:
 47 |           name: artifact
 48 |           path: dist
 49 | 
 50 |       - name: "✏️ Generate release changelog"
 51 |         id: changelog
 52 |         uses: heinrichreimer/github-changelog-generator-action@v2.3
 53 |         with:
 54 |           filterByMilestone: false
 55 |           onlyLastTag: true
 56 |           pullRequests: true
 57 |           prWoLabels: true
 58 |           token: ${{ secrets.GITHUB_TOKEN }}
 59 |           verbose: true
 60 | 
 61 |       - name: Create GitHub release
 62 |         uses: softprops/action-gh-release@v1
 63 |         with:
 64 |           body: ${{ steps.changelog.outputs.changelog }}
 65 |           files: dist/**/*
 66 | 
 67 |   # Test PyPI
 68 |   test_pypi_publish:
 69 |     name: Test publishing to PyPI
 70 |     needs: [build_packages]
 71 |     runs-on: ubuntu-latest
 72 | 
 73 |     steps:
 74 |       - uses: actions/download-artifact@v3
 75 |         with:
 76 |           name: artifact
 77 |           path: dist
 78 | 
 79 |       - uses: pypa/gh-action-pypi-publish@v1.6.4
 80 |         with:
 81 |           user: __token__
 82 |           password: ${{ secrets.TEST_PYPI_TOKEN }}
 83 |           repository_url: https://test.pypi.org/legacy/
 84 |           skip_existing: true
 85 | 
 86 |   # Publish to PyPI
 87 |   pypi_publish:
 88 |     name: Publish to PyPI
 89 |     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
 90 |     needs: [build_packages]
 91 |     runs-on: ubuntu-latest
 92 | 
 93 |     steps:
 94 |       - uses: actions/download-artifact@v3
 95 |         with:
 96 |           name: artifact
 97 |           path: dist
 98 | 
 99 |       - uses: pypa/gh-action-pypi-publish@v1.6.4
100 |         with:
101 |           user: __token__
102 |           password: ${{ secrets.PYPI_TOKEN }}
103 |           print_hash: true
104 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.hdf5
 2 | *.h5
 3 | logs/
 4 | .vscode/
 5 | __pycache__
 6 | dist
 7 | build
 8 | *.egg-info
 9 | images/**
10 | !images/ReadMe.md
11 | # Ignore my Visual Studio solution for editing python
12 | NSFWModel/**
13 | trained_models/**
14 | training/train_best_models.cmd
15 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [Unreleased]
 9 | 
10 | ### Changed
11 | 
12 | - Require Python 3.8+
13 | - Require Tensorflow 2.2+ for non-macOS
14 | - Require Tensorflow 2.5+ for macOS via tensorflow_macos
15 | 
16 | ### Fixed
17 | 
18 | - Declares numpy dependency explicitly
19 | 
20 | ### Removed
21 | 
22 | - Dropped support for Python 3.7 and older
23 |     - 3.7 is EOL in a few months and all others are already EOL
24 | 
25 | ## [1.1.1] - 2021-12-26
26 | 
27 | ### Changed
28 | 
29 | - break out numpy (nd array) function
30 | - remove classic app run modes for argparse
31 | - turn down verbosity in image load via file
32 | 
33 | ### Added
34 | 
35 | - one more example in README for running
36 | 
37 | ### Fixed 
38 | 
39 | - fix requirements for clean system (needs PIL)
40 | 
41 | ## [1.2.0] - 2020-05-15
42 | 
43 | ### Added
44 | 
45 | - New model release
46 | 
47 | ## [1.1.0] - 2020-03-03
48 | 
49 | ### Changed
50 | 
51 | - update to tensorflow 2.1.0 and updated mobilenet-based model
52 | 
53 | ## [1.0.0] - 2019-04-04
54 | 
55 | ### Added
56 | 
57 | - initial creation
58 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | This project contains third-party copyrighted material under different licenses.
 2 | Except where otherwise explicitly stated, this project is licensed as follows:
 3 | 
 4 | MIT License
 5 | 
 6 | Copyright (c) 2020 The nsfw_model Developers
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in all
16 | copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![NSFW Detector logo](https://github.com/GantMan/nsfw_model/blob/master/_art/nsfw_detection.png?raw=true)
  2 | 
  3 | # NSFW Detection Machine Learning Model
  4 | 
  5 | [![All Contributors](https://img.shields.io/badge/all_contributors-2-orange.svg?style=flat-square)](#contributors)
  6 | 
  7 | Trained on 60+ Gigs of data to identify:
  8 | - `drawings` - safe for work drawings (including anime)
  9 | - `hentai` - hentai and pornographic drawings
 10 | - `neutral` - safe for work neutral images
 11 | - `porn` - pornographic images, sexual acts
 12 | - `sexy` - sexually explicit images, not pornography
 13 | 
 14 | This model powers [NSFW JS](https://github.com/infinitered/nsfwjs) - [More Info](https://shift.infinite.red/avoid-nightmares-nsfw-js-ab7b176978b1)
 15 | 
 16 | ## Current Status:
 17 | 
 18 | 93% Accuracy with the following confusion matrix, based on Inception V3.
 19 | ![nsfw confusion matrix](_art/nsfw_confusion93.png)
 20 | 
 21 | ## Requirements:
 22 | 
 23 | See [requirements.txt](requirements.txt).
 24 | 
 25 | ## Usage
 26 | 
 27 | For programmatic use of the library. 
 28 | 
 29 | ```python
 30 | from nsfw_detector import predict
 31 | model = predict.load_model('./nsfw_mobilenet2.224x224.h5')
 32 | 
 33 | # Predict single image
 34 | predict.classify(model, '2.jpg')
 35 | # {'2.jpg': {'sexy': 4.3454722e-05, 'neutral': 0.00026579265, 'porn': 0.0007733492, 'hentai': 0.14751932, 'drawings': 0.85139805}}
 36 | 
 37 | # Predict multiple images at once
 38 | predict.classify(model, ['/Users/bedapudi/Desktop/2.jpg', '/Users/bedapudi/Desktop/6.jpg'])
 39 | # {'2.jpg': {'sexy': 4.3454795e-05, 'neutral': 0.00026579312, 'porn': 0.0007733498, 'hentai': 0.14751942, 'drawings': 0.8513979}, '6.jpg': {'drawings': 0.004214506, 'hentai': 0.013342537, 'neutral': 0.01834045, 'porn': 0.4431829, 'sexy': 0.5209196}}
 40 | 
 41 | # Predict for all images in a directory
 42 | predict.classify(model, '/Users/bedapudi/Desktop/')
 43 | 
 44 | ```
 45 | 
 46 | If you've installed the package or use the command-line this should work, too...
 47 | 
 48 | ```sh
 49 | # a single image
 50 | nsfw-predict --saved_model_path mobilenet_v2_140_224 --image_source test.jpg
 51 | 
 52 | # an image directory
 53 | nsfw-predict --saved_model_path mobilenet_v2_140_224 --image_source images
 54 | 
 55 | # a single image (from code/CLI)
 56 | python3 nsfw_detector/predict.py --saved_model_path mobilenet_v2_140_224 --image_source test.jpg
 57 | 
 58 | ```
 59 | 
 60 | 
 61 | ## Download
 62 | Please feel free to use this model to help your products!  
 63 | 
 64 | If you'd like to [say thanks for creating this, I'll take a donation for hosting costs](https://www.paypal.me/GantLaborde).
 65 | 
 66 | # Latest Models Zip (v1.1.0)
 67 | https://github.com/GantMan/nsfw_model/releases/tag/1.1.0
 68 | 
 69 | ### Original Inception v3 Model (v1.0)
 70 | * [Keras 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfw.299x299.h5)
 71 | * [TensorflowJS 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfwjs.zip)
 72 | * [TensorflowJS Quantized 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/min_nsfwjs.zip)
 73 | * [Tensorflow 299x299 Image Model](https://s3.amazonaws.com/ir_public/ai/nsfw_models/nsfw.299x299.pb) - [Graph if Needed](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms#inspecting-graphs)
 74 | 
 75 | ### Original Mobilenet v2 Model (v1.0)
 76 | * [Keras 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/nsfw_mobilenet2.224x224.h5)
 77 | * [TensorflowJS 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TFJS_nsfw_mobilenet/tfjs_nsfw_mobilenet.zip)
 78 | * [TensorflowJS Quantized 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TFJS_nsfw_mobilenet/tfjs_quant_nsfw_mobilenet.zip)
 79 | * [Tensorflow 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TF_nsfw_mobilenet/nsfw_mobilenet.pb) - [Graph if Needed](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms#inspecting-graphs)
 80 | * [Tensorflow Quantized 224x224 Image Model](https://s3.amazonaws.com/ir_public/nsfwjscdn/TF_nsfw_mobilenet/quant_nsfw_mobilenet.pb) - [Graph if Needed](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms#inspecting-graphs)
 81 | 
 82 | ## PyTorch Version
 83 | Kudos to the community for creating a PyTorch version with resnet!
 84 | https://github.com/yangbisheng2009/nsfw-resnet
 85 | 
 86 | ## TF1 Training Folder Contents
 87 | Simple description of the scripts used to create this model:
 88 | * `inceptionv3_transfer/` - Folder with all the code to train the Keras based Inception v3 transfer learning model.  Includes `constants.py` for configuration, and two scripts for actual training/refinement.
 89 | * `mobilenetv2_transfer/` - Folder with all the code to train the Keras based Mobilenet v2 transfer learning model.
 90 | * `visuals.py` - The code to create the confusion matrix graphic
 91 | * `self_clense.py` - If the training data has significant inaccuracy, `self_clense` helps cross validate errors in the training data in reasonable time.   The better the model gets, the better you can use it to clean the training data manually.
 92 | 
 93 | _e.g._
 94 | ```bash
 95 | cd training
 96 | # Start with all locked transfer of Inception v3
 97 | python inceptionv3_transfer/train_initialization.py
 98 | 
 99 | # Continue training on model with fine-tuning
100 | python inceptionv3_transfer/train_fine_tune.py
101 | 
102 | # Create a confusion matrix of the model
103 | python visuals.py
104 | ```
105 | 
106 | ## Extra Info
107 | There's no easy way to distribute the training data, but if you'd like to help with this model or train other models, get in touch with me and we can work together.
108 | 
109 | Advancements in this model power the quantized TFJS module on https://nsfwjs.com/
110 | 
111 | My Twitter is [@GantLaborde](https://twitter.com/GantLaborde) - I'm a School Of AI Wizard New Orleans.  I run the twitter account [@FunMachineLearn](https://twitter.com/FunMachineLearn)
112 | 
113 | Learn more about [me](http://gantlaborde.com/) and the [company I work for](https://infinite.red/).
114 | 
115 | Special thanks to the [nsfw_data_scraper](https://github.com/alexkimxyz/nsfw_data_scrapper) for the training data.  If you're interested in a more detailed analysis of types of NSFW images, you could probably use this repo code with [this data](https://github.com/EBazarov/nsfw_data_source_urls).
116 | 
117 | If you need React Native, Elixir, AI, or Machine Learning work, check in with us at [Infinite Red](https://infinite.red/), who make all these experiments possible.  We're an amazing software consultancy worldwide!
118 | 
119 | ## Cite
120 | ```
121 | @misc{man,
122 |   title={Deep NN for NSFW Detection},
123 |   url={https://github.com/GantMan/nsfw_model},
124 |   journal={GitHub},
125 |   author={Laborde, Gant}}
126 | ```
127 | 
128 | ## Contributors
129 | 
130 | Thanks goes to these wonderful people ([emoji key](https://github.com/kentcdodds/all-contributors#emoji-key)):
131 | 
132 | <!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
133 | | [<img src="https://avatars0.githubusercontent.com/u/997157?v=4" width="100px;"/><br /><sub><b>Gant Laborde</b></sub>](http://gantlaborde.com/)<br />[💻](https://github.com/GantMan/nsfw_model/commits?author=GantMan "Code") [📖](https://github.com/GantMan/nsfw_model/commits?author=GantMan "Documentation") [🤔](#ideas-GantMan "Ideas, Planning, & Feedback") | [<img src="https://avatars2.githubusercontent.com/u/15898654?v=4" width="100px;"/><br /><sub><b>Bedapudi Praneeth</b></sub>](http://bpraneeth.com)<br />[💻](https://github.com/GantMan/nsfw_model/commits?author=bedapudi6788 "Code") [🤔](#ideas-bedapudi6788 "Ideas, Planning, & Feedback") |
134 | | :---: | :---: |
135 | <!-- ALL-CONTRIBUTORS-LIST:END -->
136 | 
137 | This project follows the [all-contributors](https://github.com/kentcdodds/all-contributors) specification. Contributions of any kind welcome!
138 | 


--------------------------------------------------------------------------------
/_art/nsfw_confusion93.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GantMan/nsfw_model/699b6796a55604341fbfdffe2b27ced1d868c591/_art/nsfw_confusion93.png


--------------------------------------------------------------------------------
/_art/nsfw_detection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GantMan/nsfw_model/699b6796a55604341fbfdffe2b27ced1d868c591/_art/nsfw_detection.png


--------------------------------------------------------------------------------
/images/ReadMe.md:
--------------------------------------------------------------------------------
 1 | # Images Setup
 2 | 
 3 | Place a folder of images here for each class you want. The training scripts will automatically separate train/test/validation from each
 4 | folder and will generate a new label for each folder. The labels will be written to the output model directory. Consult the .cmd and .sh training
 5 | scripts in the training folder for full usage in the event that you are invoking the python scripts directly.
 6 | 
 7 | You should pre-resize your images to the target network input size. Otherwise, your training times will be increased by several hours due to the
 8 | preprocessing expense and gaining nothing from it.
 9 | 
10 | *nix users can just make symbolic links for each class here to avoid copying.
11 | 
12 | Windows users can, with an admin command prompt, create symbolic links as well with the MKLINK command. Like so:
13 | 
14 | `mklink /J link_name C:\real\folder\path`


--------------------------------------------------------------------------------
/nsfw_detector/__init__.py:
--------------------------------------------------------------------------------
1 | # empty file for package import


--------------------------------------------------------------------------------
/nsfw_detector/predict.py:
--------------------------------------------------------------------------------
  1 | #! python
  2 | 
  3 | import argparse
  4 | import json
  5 | from os import listdir
  6 | from os.path import isfile, join, exists, isdir, abspath
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | from tensorflow import keras
 11 | import tensorflow_hub as hub
 12 | 
 13 | 
 14 | IMAGE_DIM = 224   # required/default image dimensionality
 15 | 
 16 | def load_images(image_paths, image_size, verbose=True):
 17 |     '''
 18 |     Function for loading images into numpy arrays for passing to model.predict
 19 |     inputs:
 20 |         image_paths: list of image paths to load
 21 |         image_size: size into which images should be resized
 22 |         verbose: show all of the image path and sizes loaded
 23 |     
 24 |     outputs:
 25 |         loaded_images: loaded images on which keras model can run predictions
 26 |         loaded_image_indexes: paths of images which the function is able to process
 27 |     
 28 |     '''
 29 |     loaded_images = []
 30 |     loaded_image_paths = []
 31 | 
 32 |     if isdir(image_paths):
 33 |         parent = abspath(image_paths)
 34 |         image_paths = [join(parent, f) for f in listdir(image_paths) if isfile(join(parent, f))]
 35 |     elif isfile(image_paths):
 36 |         image_paths = [image_paths]
 37 | 
 38 |     for img_path in image_paths:
 39 |         try:
 40 |             if verbose:
 41 |                 print(img_path, "size:", image_size)
 42 |             image = keras.preprocessing.image.load_img(img_path, target_size=image_size)
 43 |             image = keras.preprocessing.image.img_to_array(image)
 44 |             image /= 255
 45 |             loaded_images.append(image)
 46 |             loaded_image_paths.append(img_path)
 47 |         except Exception as ex:
 48 |             print("Image Load Failure: ", img_path, ex)
 49 |     
 50 |     return np.asarray(loaded_images), loaded_image_paths
 51 | 
 52 | def load_model(model_path):
 53 |     if model_path is None or not exists(model_path):
 54 |     	raise ValueError("saved_model_path must be the valid directory of a saved model to load.")
 55 |     
 56 |     model = tf.keras.models.load_model(model_path, custom_objects={'KerasLayer': hub.KerasLayer},compile=False)
 57 |     return model
 58 | 
 59 | 
 60 | def classify(model, input_paths, image_dim=IMAGE_DIM, predict_args={}):
 61 |     """
 62 |     Classify given a model, input paths (could be single string), and image dimensionality.
 63 |     
 64 |     Optionally, pass predict_args that will be passed to tf.keras.Model.predict().
 65 |     """
 66 |     images, image_paths = load_images(input_paths, (image_dim, image_dim))
 67 |     probs = classify_nd(model, images, predict_args)
 68 |     return dict(zip(image_paths, probs))
 69 | 
 70 | 
 71 | def classify_nd(model, nd_images, predict_args={}):
 72 |     """
 73 |     Classify given a model, image array (numpy)
 74 |     
 75 |     Optionally, pass predict_args that will be passed to tf.keras.Model.predict().
 76 |     """
 77 |     model_preds = model.predict(nd_images, **predict_args)
 78 |     # preds = np.argsort(model_preds, axis = 1).tolist()
 79 |     
 80 |     categories = ['drawings', 'hentai', 'neutral', 'porn', 'sexy']
 81 | 
 82 |     probs = []
 83 |     for i, single_preds in enumerate(model_preds):
 84 |         single_probs = {}
 85 |         for j, pred in enumerate(single_preds):
 86 |             single_probs[categories[j]] = float(pred)
 87 |         probs.append(single_probs)
 88 |     return probs
 89 | 
 90 | 
 91 | def main(args=None):
 92 |     parser = argparse.ArgumentParser(
 93 |         description="""A script to perform NFSW classification of images""",
 94 |         epilog="""
 95 |         Launch with default model and a test image
 96 |             python nsfw_detector/predict.py --saved_model_path mobilenet_v2_140_224 --image_source test.jpg
 97 |     """, formatter_class=argparse.RawTextHelpFormatter)
 98 |     
 99 |     submain = parser.add_argument_group('main execution and evaluation functionality')
100 |     submain.add_argument('--image_source', dest='image_source', type=str, required=True, 
101 |                             help='A directory of images or a single image to classify')
102 |     submain.add_argument('--saved_model_path', dest='saved_model_path', type=str, required=True, 
103 |                             help='The model to load')
104 |     submain.add_argument('--image_dim', dest='image_dim', type=int, default=IMAGE_DIM,
105 |                             help="The square dimension of the model's input shape")
106 |     if args is not None:
107 |         config = vars(parser.parse_args(args))
108 |     else:
109 |         config = vars(parser.parse_args())
110 | 
111 |     if config['image_source'] is None or not exists(config['image_source']):
112 |     	raise ValueError("image_source must be a valid directory with images or a single image to classify.")
113 |     
114 |     model = load_model(config['saved_model_path'])    
115 |     image_preds = classify(model, config['image_source'], config['image_dim'])
116 |     print(json.dumps(image_preds, indent=2), '\n')
117 | 
118 | 
119 | if __name__ == "__main__":
120 | 	main()
121 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=2.2.0;sys_platform != 'darwin'
2 | tensorflow_macos>=2.5.0;sys_platform == 'darwin'
3 | tensorflow-hub==0.12.0
4 | pillow
5 | 
6 | numpy


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Note: To use the 'upload' functionality of this file, you must:
  5 | #   $ pip install twine
  6 | 
  7 | import io
  8 | import os
  9 | import sys
 10 | from shutil import rmtree
 11 | 
 12 | from setuptools import find_packages, setup, Command
 13 | 
 14 | # Package meta-data.
 15 | NAME = 'nsfw_detector'
 16 | DESCRIPTION = 'NSFW Image Detection with Deep Learning'
 17 | URL = 'https://github.com/GantMan/nsfw_model'
 18 | EMAIL = 'gantman@gmail.com'
 19 | AUTHOR = 'Gant Laborde'
 20 | REQUIRES_PYTHON = '>=3.8.0'
 21 | VERSION = '1.3.0'
 22 | 
 23 | # What packages are optional?
 24 | EXTRAS = {
 25 |     # 'fancy feature': ['django'],
 26 | }
 27 | 
 28 | # The rest you shouldn't have to touch too much :)
 29 | # ------------------------------------------------
 30 | # Except, perhaps the License and Trove Classifiers!
 31 | # If you do change the License, remember to change the Trove Classifier for that!
 32 | 
 33 | here = os.path.abspath(os.path.dirname(__file__))
 34 | 
 35 | # Import the requirements.
 36 | REQUIRED = []
 37 | try:
 38 |     with io.open(os.path.join(here, 'requirements.txt'), encoding='utf-8') as f:
 39 |         for line_req in f:
 40 |             if line_req[0] != '#':
 41 |                 REQUIRED.append(line_req.strip())
 42 | except FileNotFoundError:
 43 |     REQUIRED = []
 44 | 
 45 | # Import the README and use it as the long-description.
 46 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 47 | try:
 48 |     with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
 49 |         long_description = '\n' + f.read()
 50 |     with io.open(os.path.join(here, 'CHANGELOG.md'), encoding='utf-8') as f:
 51 |         long_description = '\n' + f.read()
 52 | except FileNotFoundError:
 53 |     long_description = DESCRIPTION
 54 | 
 55 | # Load the package's __version__.py module as a dictionary.
 56 | about = {}
 57 | if not VERSION:
 58 |     with open(os.path.join(here, NAME, '__version__.py')) as f:
 59 |         exec(f.read(), about)
 60 | else:
 61 |     about['__version__'] = VERSION
 62 | 
 63 | 
 64 | class UploadCommand(Command):
 65 |     """Support setup.py upload."""
 66 | 
 67 |     description = 'Build and publish the package.'
 68 |     user_options = []
 69 | 
 70 |     @staticmethod
 71 |     def status(s):
 72 |         """Prints things in bold."""
 73 |         print('\033[1m{0}\033[0m'.format(s))
 74 | 
 75 |     def initialize_options(self):
 76 |         pass
 77 | 
 78 |     def finalize_options(self):
 79 |         pass
 80 | 
 81 |     def run(self):
 82 |         try:
 83 |             self.status('Removing previous builds…')
 84 |             rmtree(os.path.join(here, 'dist'))
 85 |         except OSError:
 86 |             pass
 87 | 
 88 |         self.status('Building Source and Wheel (universal) distribution…')
 89 |         os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 90 | 
 91 |         self.status('Uploading the package to PyPI via Twine…')
 92 |         os.system('twine upload dist/*')
 93 | 
 94 |         self.status('Pushing git tags…')
 95 |         os.system('git tag v{0}'.format(about['__version__']))
 96 |         os.system('git push --tags')
 97 | 
 98 |         sys.exit()
 99 | 
100 | 
101 | # Where the magic happens:
102 | setup(
103 |     name=NAME,
104 |     version=about['__version__'],
105 |     description=DESCRIPTION,
106 |     long_description=long_description,
107 |     long_description_content_type='text/markdown',
108 |     author=AUTHOR,
109 |     author_email=EMAIL,
110 |     python_requires=REQUIRES_PYTHON,
111 |     url=URL,
112 |     packages=find_packages(exclude=('tests',)),
113 |     # If your package is a single module, use this instead of 'packages':
114 |     # py_modules=['mypackage'],
115 | 
116 |     # entry_points={
117 |     #     'console_scripts': ['mycli=mymodule:cli'],
118 |     # },
119 |     install_requires=REQUIRED,
120 |     extras_require=EXTRAS,
121 |     include_package_data=True,
122 |     license='MIT',
123 |     classifiers=[
124 |         # Trove classifiers
125 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
126 |         'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
127 |         'Programming Language :: Python',
128 |         'Programming Language :: Python :: 3',
129 |         'Programming Language :: Python :: 3.8',
130 |         'Programming Language :: Python :: 3.9',
131 |         'Programming Language :: Python :: 3.10',
132 |         'Programming Language :: Python :: 3.11',
133 |         'Programming Language :: Python :: Implementation :: CPython',
134 |         'Programming Language :: Python :: Implementation :: PyPy'
135 |     ],
136 |     # $ setup.py publish support.
137 |     cmdclass={
138 |         'upload': UploadCommand
139 |     },
140 |     entry_points="""
141 |         [console_scripts]
142 |         nsfw-predict=nsfw_detector.predict:main
143 |     """
144 | )
145 | 


--------------------------------------------------------------------------------
/tf1/ReadMe.md:
--------------------------------------------------------------------------------
1 | # Deprecated Tensorflow 1.x Version
2 | 
3 | This version of the nsfw_model training and validation code depends on Tensorflow 1.x and is deprecated.


--------------------------------------------------------------------------------
/tf1/nsfw_detector/__init__.py:
--------------------------------------------------------------------------------
1 | from .keras_predict import keras_predictor as NSFWDetector


--------------------------------------------------------------------------------
/tf1/nsfw_detector/keras_predict.py:
--------------------------------------------------------------------------------
 1 | import keras
 2 | import numpy as np
 3 | 
 4 | 
 5 | def load_images(image_paths, image_size):
 6 |     '''
 7 |     Function for loading images into numpy arrays for passing to model.predict
 8 |     inputs:
 9 |         image_paths: list of image paths to load
10 |         image_size: size into which images should be resized
11 |     
12 |     outputs:
13 |         loaded_images: loaded images on which keras model can run predictions
14 |         loaded_image_indexes: paths of images which the function is able to process
15 |     
16 |     '''
17 |     loaded_images = []
18 |     loaded_image_paths = []
19 | 
20 |     for i, img_path in enumerate(image_paths):
21 |         try:
22 |             image = keras.preprocessing.image.load_img(img_path, target_size = image_size)
23 |             image = keras.preprocessing.image.img_to_array(image)
24 |             image /= 255
25 |             loaded_images.append(image)
26 |             loaded_image_paths.append(img_path)
27 |         except Exception as ex:
28 |             print(i, img_path, ex)
29 |     
30 |     return np.asarray(loaded_images), loaded_image_paths
31 | 
32 | class keras_predictor():
33 |     '''
34 |         Class for loading model and running predictions.
35 |         For example on how to use take a look the if __name__ == '__main__' part.
36 |     '''
37 |     nsfw_model = None
38 | 
39 |     def __init__(self, model_path):
40 |         '''
41 |             model = keras_predictor('path_to_weights')
42 |         '''
43 |         keras_predictor.nsfw_model = keras.models.load_model(model_path)
44 | 
45 | 
46 |     def predict(self, image_paths = [], batch_size = 32, image_size = (299, 299), categories = ['drawings', 'hentai', 'neutral', 'porn', 'sexy']):
47 |         '''
48 |             inputs:
49 |                 image_paths: list of image paths or can be a string too (for single image)
50 |                 batch_size: batch_size for running predictions
51 |                 image_size: size to which the image needs to be resized
52 |                 categories: since the model predicts numbers, categories is the list of actual names of categories
53 |         '''
54 |         if isinstance(image_paths, str):
55 |             image_paths = [image_paths]
56 | 
57 |         loaded_images, loaded_image_paths = load_images(image_paths, image_size)
58 |         
59 |         if not loaded_image_paths:
60 |             return {}
61 | 
62 |         model_preds = keras_predictor.nsfw_model.predict(loaded_images, batch_size = batch_size)
63 | 
64 |         preds = np.argsort(model_preds, axis = 1).tolist()
65 | 
66 |         probs = []
67 |         for i, single_preds in enumerate(preds):
68 |             single_probs = []
69 |             for j, pred in enumerate(single_preds):
70 |                 single_probs.append(model_preds[i][pred])
71 |                 preds[i][j] = categories[pred]
72 |             
73 |             probs.append(single_probs)
74 | 
75 |         
76 |         images_preds = {}
77 |         
78 |         for i, loaded_image_path in enumerate(loaded_image_paths):
79 |             images_preds[loaded_image_path] = {}
80 |             for _ in range(len(preds[i])):
81 |                 images_preds[loaded_image_path][preds[i][_]] = probs[i][_]
82 | 
83 |         return images_preds
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     print('\n Enter path for the keras weights, leave empty to use "./nsfw.299x299.h5" \n')
88 |     weights_path = input().strip()
89 |     if not weights_path: weights_path = "../nsfw.299x299.h5"
90 |     
91 |     m = keras_predictor(weights_path)
92 | 
93 |     while 1:
94 |         print('\n Enter single image path or multiple images seperated by || (2 pipes) \n')
95 |         images = input().split('||')
96 |         images = [image.strip() for image in images]
97 |         print(m.predict(images), '\n')
98 | 


--------------------------------------------------------------------------------
/tf1/training/inceptionv3_transfer/callbacks.py:
--------------------------------------------------------------------------------
 1 | from keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler
 2 | from time import time
 3 | 
 4 | # Slow down training deeper into dataset
 5 | def schedule(epoch):
 6 |     if epoch < 6:
 7 |         # Warmup model first
 8 |         return .0000032
 9 |     elif epoch < 12:
10 |         return .01
11 |     elif epoch < 20:
12 |         return .002
13 |     elif epoch < 40:
14 |         return .0004
15 |     elif epoch < 60:
16 |         return .00008
17 |     elif epoch < 80:
18 |         return .000016
19 |     elif epoch < 95:
20 |         return .0000032        
21 |     else:
22 |         return .0000009       
23 | 
24 | 
25 | def make_callbacks(weights_file):
26 |     # checkpoint
27 |     filepath = weights_file
28 |     checkpoint = ModelCheckpoint(
29 |         filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
30 | 
31 |     # Update info
32 |     tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
33 | 
34 |     # learning rate schedule
35 |     lr_scheduler = LearningRateScheduler(schedule)
36 | 
37 |     # all the goodies
38 |     return [lr_scheduler, checkpoint, tensorboard]


--------------------------------------------------------------------------------
/tf1/training/inceptionv3_transfer/constants.py:
--------------------------------------------------------------------------------
 1 | # Config
 2 | SIZES = {
 3 |     'basic': 299
 4 | }
 5 | 
 6 | NUM_CHANNELS = 3
 7 | NUM_CLASSES = 5
 8 | GENERATOR_BATCH_SIZE = 32
 9 | TOTAL_EPOCHS = 100
10 | STEPS_PER_EPOCH = 500
11 | VALIDATION_STEPS = 50
12 | BASE_DIR = 'D:\\nswf_model_training_data\\data'


--------------------------------------------------------------------------------
/tf1/training/inceptionv3_transfer/generators.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.preprocessing.image import ImageDataGenerator
 3 | import constants
 4 | 
 5 | train_datagen = ImageDataGenerator(
 6 |     rescale=1./255,
 7 |     rotation_range=30,
 8 |     width_shift_range=0.2,
 9 |     height_shift_range=0.2,
10 |     shear_range=0.2,
11 |     zoom_range=0.2,
12 |     channel_shift_range=20,
13 |     horizontal_flip=True,
14 |     fill_mode='nearest'
15 | )
16 | 
17 | # Validation data should not be modified
18 | validation_datagen = ImageDataGenerator(
19 |     rescale=1./255
20 | )
21 | 
22 | train_dir = os.path.join(constants.BASE_DIR, 'train')
23 | test_dir = os.path.join(constants.BASE_DIR, 'test')
24 | 
25 | def create_generators(height, width):
26 |     train_generator = train_datagen.flow_from_directory(
27 |         train_dir,
28 |         target_size=(height, width),
29 |         class_mode='categorical',
30 |         batch_size=constants.GENERATOR_BATCH_SIZE
31 |     )
32 | 
33 |     validation_generator = validation_datagen.flow_from_directory(
34 |         test_dir,
35 |         target_size=(height, width),
36 |         class_mode='categorical',
37 |         batch_size=constants.GENERATOR_BATCH_SIZE
38 |     )
39 | 
40 |     return[train_generator, validation_generator]


--------------------------------------------------------------------------------
/tf1/training/inceptionv3_transfer/train_fine_tune.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.preprocessing.image import ImageDataGenerator
 3 | from keras.backend import clear_session
 4 | from keras.optimizers import SGD
 5 | from pathlib import Path
 6 | from keras.models import Sequential, Model, load_model
 7 | 
 8 | # reusable stuff
 9 | import constants
10 | import callbacks
11 | import generators
12 | 
13 | # No kruft plz
14 | clear_session()
15 | 
16 | # Config
17 | height = constants.SIZES['basic']
18 | width = height
19 | weights_file = "weights.best_inception" + str(height) + ".hdf5"
20 | 
21 | print ('Starting from last full model run')
22 | model = load_model("nsfw." + str(width) + "x" + str(height) + ".h5")
23 | 
24 | # Unlock a few layers deep in Inception v3
25 | model.trainable = False
26 | set_trainable = False
27 | for layer in model.layers:
28 |     if layer.name == 'conv2d_56':
29 |         set_trainable = True
30 |     if set_trainable:
31 |         layer.trainable = True
32 |     else:
33 |         layer.trainable = False
34 | 
35 | # Let's see it
36 | print('Summary')
37 | print(model.summary())
38 | 
39 | # Load checkpoint if one is found
40 | if os.path.exists(weights_file):
41 |         print ("loading ", weights_file)
42 |         model.load_weights(weights_file)
43 | 
44 | # Get all model callbacks
45 | callbacks_list = callbacks.make_callbacks(weights_file)
46 | 
47 | print('Compile model')
48 | opt = SGD(momentum=.9)
49 | model.compile(
50 |     loss='categorical_crossentropy',
51 |     optimizer=opt,
52 |     metrics=['accuracy']
53 | )
54 | 
55 | # Get training/validation data via generators
56 | train_generator, validation_generator = generators.create_generators(height, width)
57 | 
58 | print('Start training!')
59 | history = model.fit_generator(
60 |     train_generator,
61 |     callbacks=callbacks_list,
62 |     epochs=constants.TOTAL_EPOCHS,
63 |     steps_per_epoch=constants.STEPS_PER_EPOCH,
64 |     shuffle=True,
65 |     workers=4,
66 |     use_multiprocessing=False,
67 |     validation_data=validation_generator,
68 |     validation_steps=constants.VALIDATION_STEPS
69 | )
70 | 
71 | # Save it for later
72 | print('Saving Model')
73 | model.save("nsfw." + str(width) + "x" + str(height) + ".h5")
74 | 


--------------------------------------------------------------------------------
/tf1/training/inceptionv3_transfer/train_initialization.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.preprocessing.image import ImageDataGenerator
 3 | from keras.backend import clear_session
 4 | from keras.optimizers import SGD
 5 | from pathlib import Path
 6 | from keras.applications import InceptionV3
 7 | from keras.models import Sequential, Model, load_model
 8 | from keras.layers import Dense, Dropout, Flatten, AveragePooling2D
 9 | from keras import initializers, regularizers
10 | 
11 | # reusable stuff
12 | import constants
13 | import callbacks
14 | import generators
15 | 
16 | # No kruft plz
17 | clear_session()
18 | 
19 | # Config
20 | height = constants.SIZES['basic']
21 | width = height
22 | weights_file = "weights.best_inception" + str(height) + ".hdf5"
23 | 
24 | conv_base = InceptionV3(
25 |     weights='imagenet', 
26 |     include_top=False, 
27 |     input_shape=(height, width, constants.NUM_CHANNELS)
28 | )
29 | 
30 | # First time run, no unlocking
31 | conv_base.trainable = False
32 | 
33 | # Let's see it
34 | print('Summary')
35 | print(conv_base.summary())
36 | 
37 | # Let's construct that top layer replacement
38 | x = conv_base.output
39 | x = AveragePooling2D(pool_size=(8, 8))(x)
40 | x - Dropout(0.4)(x)
41 | x = Flatten()(x)
42 | x = Dense(256, activation='relu', kernel_initializer=initializers.he_normal(seed=None), kernel_regularizer=regularizers.l2(.0005))(x)
43 | x = Dropout(0.5)(x)
44 | # Essential to have another layer for better accuracy
45 | x = Dense(128,activation='relu', kernel_initializer=initializers.he_normal(seed=None))(x)
46 | x = Dropout(0.25)(x)
47 | predictions = Dense(constants.NUM_CLASSES,  kernel_initializer="glorot_uniform", activation='softmax')(x)
48 | 
49 | print('Stacking New Layers')
50 | model = Model(inputs = conv_base.input, outputs=predictions)
51 | 
52 | # Load checkpoint if one is found
53 | if os.path.exists(weights_file):
54 |         print ("loading ", weights_file)
55 |         model.load_weights(weights_file)
56 | 
57 | # Get all model callbacks
58 | callbacks_list = callbacks.make_callbacks(weights_file)
59 | 
60 | print('Compile model')
61 | # originally adam, but research says SGD with scheduler
62 | # opt = Adam(lr=0.001, amsgrad=True)
63 | opt = SGD(momentum=.9)
64 | model.compile(
65 |     loss='categorical_crossentropy',
66 |     optimizer=opt,
67 |     metrics=['accuracy']
68 | )
69 | 
70 | # Get training/validation data via generators
71 | train_generator, validation_generator = generators.create_generators(height, width)
72 | 
73 | print('Start training!')
74 | history = model.fit_generator(
75 |     train_generator,
76 |     callbacks=callbacks_list,
77 |     epochs=constants.TOTAL_EPOCHS,
78 |     steps_per_epoch=constants.STEPS_PER_EPOCH,
79 |     shuffle=True,
80 |     workers=4,
81 |     use_multiprocessing=False,
82 |     validation_data=validation_generator,
83 |     validation_steps=constants.VALIDATION_STEPS
84 | )
85 | 
86 | # Save it for later
87 | print('Saving Model')
88 | model.save("nsfw." + str(width) + "x" + str(height) + ".h5")
89 | 


--------------------------------------------------------------------------------
/tf1/training/mobilenetv2_transfer/callbacks.py:
--------------------------------------------------------------------------------
 1 | from keras.callbacks import ModelCheckpoint, TensorBoard, LearningRateScheduler
 2 | from time import time
 3 | 
 4 | # Slow down training deeper into dataset
 5 | def schedule(epoch):
 6 |     if epoch < 6:
 7 |         # Warmup model first
 8 |         return .0000032
 9 |     elif epoch < 12:
10 |         return .01
11 |     elif epoch < 20:
12 |         return .002
13 |     elif epoch < 40:
14 |         return .0004
15 |     elif epoch < 60:
16 |         return .00008
17 |     elif epoch < 80:
18 |         return .000016
19 |     elif epoch < 95:
20 |         return .0000032        
21 |     else:
22 |         return .0000009       
23 | 
24 | 
25 | def make_callbacks(weights_file):
26 |     # checkpoint
27 |     filepath = weights_file
28 |     checkpoint = ModelCheckpoint(
29 |         filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
30 | 
31 |     # Update info
32 |     tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
33 | 
34 |     # learning rate schedule
35 |     lr_scheduler = LearningRateScheduler(schedule)
36 | 
37 |     # all the goodies
38 |     return [lr_scheduler, checkpoint, tensorboard]


--------------------------------------------------------------------------------
/tf1/training/mobilenetv2_transfer/constants.py:
--------------------------------------------------------------------------------
 1 | # Config
 2 | SIZES = {
 3 |     'basic': 224
 4 | }
 5 | 
 6 | NUM_CHANNELS = 3
 7 | NUM_CLASSES = 5
 8 | GENERATOR_BATCH_SIZE = 32
 9 | TOTAL_EPOCHS = 100
10 | STEPS_PER_EPOCH = 500
11 | VALIDATION_STEPS = 50
12 | BASE_DIR = 'D:\\nswf_model_training_data\\data'


--------------------------------------------------------------------------------
/tf1/training/mobilenetv2_transfer/generators.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.preprocessing.image import ImageDataGenerator
 3 | import constants
 4 | 
 5 | train_datagen = ImageDataGenerator(
 6 |     rescale=1./255,
 7 |     rotation_range=30,
 8 |     width_shift_range=0.2,
 9 |     height_shift_range=0.2,
10 |     shear_range=0.2,
11 |     zoom_range=0.2,
12 |     channel_shift_range=20,
13 |     horizontal_flip=True,
14 |     fill_mode='nearest'
15 | )
16 | 
17 | # Validation data should not be modified
18 | validation_datagen = ImageDataGenerator(
19 |     rescale=1./255
20 | )
21 | 
22 | train_dir = os.path.join(constants.BASE_DIR, 'train')
23 | test_dir = os.path.join(constants.BASE_DIR, 'test')
24 | 
25 | def create_generators(height, width):
26 |     train_generator = train_datagen.flow_from_directory(
27 |         train_dir,
28 |         target_size=(height, width),
29 |         class_mode='categorical',
30 |         batch_size=constants.GENERATOR_BATCH_SIZE
31 |     )
32 | 
33 |     validation_generator = validation_datagen.flow_from_directory(
34 |         test_dir,
35 |         target_size=(height, width),
36 |         class_mode='categorical',
37 |         batch_size=constants.GENERATOR_BATCH_SIZE
38 |     )
39 | 
40 |     return[train_generator, validation_generator]


--------------------------------------------------------------------------------
/tf1/training/mobilenetv2_transfer/train_fine_tune.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.preprocessing.image import ImageDataGenerator
 3 | from keras.backend import clear_session
 4 | from keras.optimizers import SGD
 5 | from pathlib import Path
 6 | from keras.models import Sequential, Model, load_model
 7 | 
 8 | # reusable stuff
 9 | import constants
10 | import callbacks
11 | import generators
12 | 
13 | # No kruft plz
14 | clear_session()
15 | import tensorflow as tf
16 | from keras.backend.tensorflow_backend import set_session
17 | config = tf.ConfigProto()
18 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
19 | sess = tf.Session(config=config)
20 | set_session(sess)  # set this TensorFlow session as the default session for Keras
21 | 
22 | # Config
23 | height = constants.SIZES['basic']
24 | width = height
25 | weights_file = "weights.best_mobilenet" + str(height) + ".hdf5"
26 | 
27 | print ('Starting from last full model run')
28 | model = load_model("nsfwnsfw_mobilenet2." + str(width) + "x" + str(height) + ".h5")
29 | 
30 | # Unlock a few layers deep in Mobilenet v2
31 | model.trainable = False
32 | set_trainable = False
33 | for layer in model.layers:
34 |     if layer.name == 'block_11_expand':
35 |         set_trainable = True
36 |     if set_trainable:
37 |         layer.trainable = True
38 |     else:
39 |         layer.trainable = False
40 | 
41 | # Let's see it
42 | print('Summary')
43 | print(model.summary())
44 | 
45 | # Load checkpoint if one is found
46 | if os.path.exists(weights_file):
47 |         print ("loading ", weights_file)
48 |         model.load_weights(weights_file)
49 | 
50 | # Get all model callbacks
51 | callbacks_list = callbacks.make_callbacks(weights_file)
52 | 
53 | print('Compile model')
54 | opt = SGD(momentum=.9)
55 | model.compile(
56 |     loss='categorical_crossentropy',
57 |     optimizer=opt,
58 |     metrics=['accuracy']
59 | )
60 | 
61 | # Get training/validation data via generators
62 | train_generator, validation_generator = generators.create_generators(height, width)
63 | 
64 | print('Start training!')
65 | history = model.fit_generator(
66 |     train_generator,
67 |     callbacks=callbacks_list,
68 |     epochs=constants.TOTAL_EPOCHS,
69 |     steps_per_epoch=constants.STEPS_PER_EPOCH,
70 |     shuffle=True,
71 |     workers=4,
72 |     use_multiprocessing=False,
73 |     validation_data=validation_generator,
74 |     validation_steps=constants.VALIDATION_STEPS
75 | )
76 | 
77 | # Save it for later
78 | print('Saving Model')
79 | model.save("nsfwnsfw_mobilenet2." + str(width) + "x" + str(height) + ".h5")
80 | 


--------------------------------------------------------------------------------
/tf1/training/mobilenetv2_transfer/train_initialization.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from keras.preprocessing.image import ImageDataGenerator
 3 | from keras.backend import clear_session
 4 | from keras.optimizers import SGD
 5 | from pathlib import Path
 6 | from keras.applications.mobilenet_v2 import MobileNetV2
 7 | from keras.models import Sequential, Model, load_model
 8 | from keras.layers import Dense, Dropout, Flatten, AveragePooling2D
 9 | from keras import initializers, regularizers
10 | 
11 | # reusable stuff
12 | import constants
13 | import callbacks
14 | import generators
15 | 
16 | # No kruft plz
17 | clear_session()
18 | import tensorflow as tf
19 | from keras.backend.tensorflow_backend import set_session
20 | config = tf.ConfigProto()
21 | config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
22 | sess = tf.Session(config=config)
23 | set_session(sess)  # set this TensorFlow session as the default session for Keras
24 | 
25 | # Config
26 | height = constants.SIZES['basic']
27 | width = height
28 | weights_file = "weights.best_mobilenet" + str(height) + ".hdf5"
29 | 
30 | conv_base = MobileNetV2(
31 |     weights='imagenet', 
32 |     include_top=False, 
33 |     input_shape=(height, width, constants.NUM_CHANNELS)
34 | )
35 | 
36 | # First time run, no unlocking
37 | conv_base.trainable = False
38 | 
39 | # Let's see it
40 | print('Summary')
41 | print(conv_base.summary())
42 | 
43 | # Let's construct that top layer replacement
44 | x = conv_base.output
45 | x = AveragePooling2D(pool_size=(7, 7))(x)
46 | x = Flatten()(x)
47 | x = Dense(256, activation='relu', kernel_initializer=initializers.he_normal(seed=None), kernel_regularizer=regularizers.l2(.0005))(x)
48 | x = Dropout(0.5)(x)
49 | # Essential to have another layer for better accuracy
50 | x = Dense(128,activation='relu', kernel_initializer=initializers.he_normal(seed=None))(x)
51 | x = Dropout(0.25)(x)
52 | predictions = Dense(constants.NUM_CLASSES,  kernel_initializer="glorot_uniform", activation='softmax')(x)
53 | 
54 | print('Stacking New Layers')
55 | model = Model(inputs = conv_base.input, outputs=predictions)
56 | 
57 | # Load checkpoint if one is found
58 | if os.path.exists(weights_file):
59 |         print ("loading ", weights_file)
60 |         model.load_weights(weights_file)
61 | 
62 | # Get all model callbacks
63 | callbacks_list = callbacks.make_callbacks(weights_file)
64 | 
65 | print('Compile model')
66 | # originally adam, but research says SGD with scheduler
67 | # opt = Adam(lr=0.001, amsgrad=True)
68 | opt = SGD(momentum=.9)
69 | model.compile(
70 |     loss='categorical_crossentropy',
71 |     optimizer=opt,
72 |     metrics=['accuracy']
73 | )
74 | 
75 | # Get training/validation data via generators
76 | train_generator, validation_generator = generators.create_generators(height, width)
77 | 
78 | print('Start training!')
79 | history = model.fit_generator(
80 |     train_generator,
81 |     callbacks=callbacks_list,
82 |     epochs=constants.TOTAL_EPOCHS,
83 |     steps_per_epoch=constants.STEPS_PER_EPOCH,
84 |     shuffle=True,
85 |     workers=4,
86 |     use_multiprocessing=False,
87 |     validation_data=validation_generator,
88 |     validation_steps=constants.VALIDATION_STEPS
89 | )
90 | 
91 | # Save it for later
92 | print('Saving Model')
93 | model.save("nsfw_mobilenet2." + str(width) + "x" + str(height) + ".h5")
94 | 


--------------------------------------------------------------------------------
/tf1/training/self_clense.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from keras.preprocessing import image
 4 | from pathlib import Path
 5 | from keras.models import load_model
 6 | from sklearn.metrics import confusion_matrix, classification_report
 7 | 
 8 | # Initialize
 9 | model = load_model("nsfw.299x299.h5")
10 | image_size = 299
11 | file_count = 0
12 | x_test = []
13 | y_test = []
14 | mistakes = []
15 | categories = ['drawings', 'hentai', 'neutral', 'porn', 'sexy']
16 | base_dir = 'D:\\nswf_model_training_data\\data'
17 | batch_size = 1000
18 | page = 0
19 | 
20 | # CONFIGURE EACH RUN
21 | group = 'train'
22 | category_id = 4
23 | mistaken_as = 2
24 | file_type = "jpg"
25 | 
26 | 
27 | def process_batch(batch_x, batch_y):
28 |     print("Batch Check " + str(file_count))
29 |     # Convert the list of images to a numpy array
30 |     x_array = np.array(batch_x)
31 | 
32 |     # Make predictions (arrays of size 5, with probabilities)
33 |     predictions = model.predict(x_array)
34 |     max_predictions = np.argmax(predictions, axis=1)
35 | 
36 |     for idx, prediction in enumerate(max_predictions):
37 |         if prediction != category_id:
38 |             # We have a mistake!  Do we log it?
39 |             if prediction == mistaken_as:
40 |                 mistakes.append(batch_y[idx])
41 | 
42 | # Copies categorization failures to the mistakes folder for analysis
43 | def copy_all_failures():
44 |     for file_info in mistakes:
45 |         os.rename(file_info["path"], base_dir + "\\" + group + "\\mistakes\\" + str(file_info["filename"]))
46 | 
47 | print("Starting Self-clense for " + categories[category_id])
48 | # Load the data set by looping over every image file in path
49 | for image_file in Path(base_dir + "\\" + group + "\\" +
50 |                        categories[category_id]).glob("**/*." + file_type):
51 |     file_info = {"path": image_file, "filename": os.path.basename(image_file)}
52 | 
53 |     top = (page + 1) * batch_size
54 |     file_count += 1
55 | 
56 |     # Load the current image file
57 |     image_data = image.load_img(image_file, target_size=(image_size, image_size))
58 | 
59 |     # Convert the loaded image file to a numpy array
60 |     image_array = image.img_to_array(image_data)
61 |     image_array /= 255
62 | 
63 |     # Add the current image to our list of test images
64 |     x_test.append(image_array)
65 |     # To identify failed predictions
66 |     y_test.append(file_info)
67 | 
68 |     # Kick off a processing to clear RAM
69 |     if file_count == top:
70 |         process_batch(x_test, y_test)
71 |         # move next batch moment
72 |         page += 1
73 |         # reset in-memory
74 |         x_test = []
75 |         y_test = []
76 | 
77 | process_batch(x_test, y_test)     
78 | copy_all_failures()   
79 | print('Out of ' + str(file_count) + ' images of "' + str(categories[category_id]) + '" ' + str(len(mistakes)) + ' are mistaken as "' + str(categories[mistaken_as]) + '"')


--------------------------------------------------------------------------------
/tf1/training/visuals.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import matplotlib.pyplot as plt
 3 | import os
 4 | import numpy as np
 5 | from keras.preprocessing import image
 6 | from pathlib import Path
 7 | from keras.models import load_model
 8 | from sklearn.metrics import confusion_matrix, classification_report
 9 | 
10 | model = load_model("nsfw.299x299.h5")
11 | test_dir = 'D:\\nswf_model_training_data\\data\\test'
12 | image_size = 299
13 | x_test = []
14 | y_test = []
15 | file_count = 0
16 | update_frequency = 1000
17 | 
18 | class_names = ['drawings', 'hentai', 'neutral', 'porn', 'sexy']
19 | 
20 | for image_file in Path(test_dir).glob("**/*.jpg"):
21 |     file_count += 1
22 |     # Load the current image file
23 |     image_data = image.load_img(image_file, target_size=(image_size, image_size))
24 | 
25 |     # Convert the loaded image file to a numpy array
26 |     image_array = image.img_to_array(image_data)
27 |     image_array /= 255
28 | 
29 |     # Add to list of test images
30 |     x_test.append(image_array)
31 |     # Now add answer derived from folder
32 |     path_name = os.path.dirname(image_file)
33 |     folder_name = os.path.basename(path_name)
34 |     y_test.append(class_names.index(folder_name))
35 | 
36 |     if file_count % update_frequency == 0:
37 |         print("Processed " + str(file_count) + " - Current Folder: " + folder_name)
38 | 
39 | 
40 | def plot_confusion_matrix(cm, classes,
41 |                           normalize=False,
42 |                           title='Confusion matrix',
43 |                           cmap=plt.cm.get_cmap('Blues')):
44 |     """
45 |     This function prints and plots the confusion matrix.
46 |     Normalization can be applied by setting `normalize=True`.
47 |     """
48 |     if normalize:
49 |         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
50 |         print("Normalized confusion matrix")
51 |     else:
52 |         print('Confusion matrix, without normalization')
53 | 
54 |     print(cm)
55 | 
56 |     plt.imshow(cm, interpolation='nearest', cmap=cmap)
57 |     plt.title(title)
58 |     plt.colorbar()
59 |     tick_marks = np.arange(len(classes))
60 |     plt.xticks(tick_marks, classes, rotation=45)
61 |     plt.yticks(tick_marks, classes)
62 | 
63 |     fmt = '.2f' if normalize else 'd'
64 |     thresh = cm.max() / 2.
65 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
66 |         plt.text(j, i, format(cm[i, j], fmt),
67 |                  horizontalalignment="center",
68 |                  color="white" if cm[i, j] > thresh else "black")
69 | 
70 |     plt.ylabel('True label')
71 |     plt.xlabel('Predicted label')
72 |     plt.tight_layout()
73 | 
74 | x_test = np.array(x_test)
75 | predictions = model.predict(x_test)
76 | y_pred = np.argmax(predictions, axis=1)
77 | 
78 | # Compute confusion matrix
79 | cnf_matrix = confusion_matrix(y_test, y_pred)
80 | np.set_printoptions(precision=2)
81 | 
82 | # Plot normalized confusion matrix
83 | plt.figure()
84 | plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
85 |                       title='Normalized confusion matrix')
86 | 
87 | plt.show()


--------------------------------------------------------------------------------
/training/make_nsfw_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #		 http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Trains a TensorFlow model based on directories of images.
 16 | 
 17 | This program builds, trains and exports a TensorFlow 2.x model that classifies
 18 | natural images (photos) into a fixed set of classes. The classes are learned
 19 | from a user-supplied dataset of images, stored as a directory of subdirectories
 20 | of JPEG images, each subdirectory representing one class.
 21 | 
 22 | The model is built from a pre-trained image feature vector module from
 23 | TensorFlow Hub (in its TF2/SavedModel format, not the older hub.Module format)
 24 | followed by a linear classifier. The linear classifier, and optionally also
 25 | the TF Hub module, are trained on the new dataset. TF Hub offers a variety of
 26 | suitable modules with various size/accuracy tradeoffs.
 27 | 
 28 | The resulting model can be exported in TensorFlow's standard SavedModel format
 29 | and as a .tflite file for deployment to mobile devices with TensorFlow Lite.
 30 | TODO(b/139467904): Add support for post-training model optimization.
 31 | 
 32 | For more information, please see the README file next to the source code,
 33 | https://github.com/tensorflow/hub/blob/master/tensorflow_hub/tools/make_image_classifier/README.md
 34 | """
 35 | 
 36 | # NOTE: This is an expanded, command-line version of
 37 | # https://github.com/tensorflow/hub/blob/master/examples/colab/tf2_image_retraining.ipynb
 38 | # PLEASE KEEP THEM IN SYNC, such that running tests for this program
 39 | # provides assurance that the code in the colab notebook works.
 40 | 
 41 | from __future__ import absolute_import
 42 | from __future__ import division
 43 | from __future__ import print_function
 44 | from __future__ import unicode_literals
 45 | from pathlib import Path
 46 | from absl import app
 47 | from absl import flags
 48 | from absl import logging
 49 | from tensorflow import keras
 50 | from tensorflow.core.framework import attr_value_pb2
 51 | from tensorflow.core.framework import graph_pb2
 52 | from tensorflow.core.framework import node_def_pb2
 53 | from tensorflow.keras import layers
 54 | from tensorflow.keras.mixed_precision import experimental as mixed_precision
 55 | from tensorflow.python.framework import dtypes
 56 | from tensorflow.python.framework import ops
 57 | from tensorflow.python.framework import tensor_util
 58 | from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
 59 | from tensorflow.python.platform import tf_logging as logging
 60 | from tensorflow.python.util import deprecation
 61 | from tensorflow.python.util.tf_export import tf_export
 62 | import collections
 63 | import copy
 64 | import make_nsfw_model_lib as lib
 65 | import numpy as np
 66 | import os
 67 | import re
 68 | import six
 69 | import tempfile
 70 | import tensorflow as tf
 71 | import tensorflow.keras.backend as K
 72 | import tensorflow_hub as hub
 73 | 
 74 | _DEFAULT_HPARAMS = lib.get_default_hparams()
 75 | 
 76 | flags.DEFINE_string(
 77 | 		"image_dir", None,
 78 | 		"A directory with subdirectories of images, one per class. "
 79 | 		"If unset, the TensorFlow Flowers example dataset will be used. "
 80 | 		"Internally, the dataset is split into training and validation pieces.")
 81 | flags.DEFINE_string(
 82 | 		"tfhub_module",
 83 | 		"https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4",
 84 | 		"Which TF Hub module to use. Must be a module in TF2/SavedModel format "
 85 | 		"for computing image feature vectors.")
 86 | flags.DEFINE_integer(
 87 | 		"image_size", None,
 88 | 		"The height and width of images to feed into --tfhub_module. "
 89 | 		"(For now, must be set manually for modules with variable input size.)")
 90 | flags.DEFINE_string(
 91 | 		"saved_model_dir", None,
 92 | 		"The final model is exported as a SavedModel directory with this name.")
 93 | flags.DEFINE_string(
 94 | 		"tflite_output_file", None,
 95 | 		"The final model is exported as a .tflite flatbuffers file with this name.")
 96 | flags.DEFINE_string(
 97 | 		"labels_output_file", None,
 98 | 		"Where to save the labels (that is, names of image subdirectories). "
 99 | 		"The lines in this file appear in the same order as the predictions "
100 | 		"of the model.")
101 | flags.DEFINE_float(
102 | 		"assert_accuracy_at_least", None,
103 | 		"If set, the program fails if the validation accuracy at the end of "
104 | 		"training is less than this number (between 0 and 1), and no export of "
105 | 		"the trained model happens.")
106 | flags.DEFINE_integer(
107 | 		"train_epochs", _DEFAULT_HPARAMS.train_epochs,
108 | 		"Training will do this many iterations over the dataset.")
109 | flags.DEFINE_bool(
110 | 		"do_fine_tuning", _DEFAULT_HPARAMS.do_fine_tuning,
111 | 		"If set, the --tfhub_module is trained together with the rest of "
112 | 		"the model being built.")
113 | flags.DEFINE_integer(
114 | 		"batch_size", _DEFAULT_HPARAMS.batch_size,
115 | 		"Each training step samples a batch of this many images "
116 | 		"from the training data. (You may need to shrink this when using a GPU "
117 | 		"and getting out-of-memory errors. Avoid values below 8 when re-training "
118 | 		"modules that use batch normalization.)")
119 | flags.DEFINE_float(
120 | 		"learning_rate", _DEFAULT_HPARAMS.learning_rate,
121 | 		"The learning rate to use for gradient descent training.")
122 | flags.DEFINE_float(
123 | 		"momentum", _DEFAULT_HPARAMS.momentum,
124 | 		"The momentum parameter to use for gradient descent training.")
125 | flags.DEFINE_float(
126 | 		"dropout_rate", _DEFAULT_HPARAMS.dropout_rate,
127 | 		"The fraction of the input units to drop, used in dropout layer.")
128 | flags.DEFINE_bool(
129 | 		"is_deprecated_tfhub_module", False,
130 | 		"Whether or not the supplied TF hub module is old and from Tensorflow 1.")
131 | flags.DEFINE_float(
132 | 		"label_smoothing", _DEFAULT_HPARAMS.label_smoothing,
133 | 		"The degree of label smoothing to use.")
134 | flags.DEFINE_float(
135 | 		"validation_split", _DEFAULT_HPARAMS.validation_split,
136 | 		"The percentage of data to use for validation.")
137 | flags.DEFINE_string(
138 |     'optimizer', _DEFAULT_HPARAMS.optimizer,
139 |     'The name of the optimizer, one of "adadelta", "adagrad", "adam",'
140 |     '"ftrl", "momentum", "sgd" or "rmsprop".')
141 | flags.DEFINE_float(
142 |     'adadelta_rho', _DEFAULT_HPARAMS.adadelta_rho,
143 |     'The decay rate for adadelta.')
144 | flags.DEFINE_float(
145 |     'adagrad_initial_accumulator_value', _DEFAULT_HPARAMS.adagrad_initial_accumulator_value,
146 |     'Starting value for the AdaGrad accumulators.')
147 | flags.DEFINE_float(
148 |     'adam_beta1', _DEFAULT_HPARAMS.adam_beta1,
149 |     'The exponential decay rate for the 1st moment estimates.')
150 | flags.DEFINE_float(
151 |     'adam_beta2', _DEFAULT_HPARAMS.adam_beta2,
152 |     'The exponential decay rate for the 2nd moment estimates.')
153 | flags.DEFINE_float('opt_epsilon', _DEFAULT_HPARAMS.opt_epsilon, 'Epsilon term for the optimizer.')
154 | flags.DEFINE_float('ftrl_learning_rate_power', _DEFAULT_HPARAMS.ftrl_learning_rate_power,
155 |                           'The learning rate power.')
156 | flags.DEFINE_float(
157 |     'ftrl_initial_accumulator_value', _DEFAULT_HPARAMS.ftrl_initial_accumulator_value,
158 |     'Starting value for the FTRL accumulators.')
159 | flags.DEFINE_float(
160 |     'ftrl_l1', _DEFAULT_HPARAMS.ftrl_l1, 'The FTRL l1 regularization strength.')
161 | 
162 | flags.DEFINE_float(
163 |     'ftrl_l2', _DEFAULT_HPARAMS.ftrl_l2, 'The FTRL l2 regularization strength.')
164 | flags.DEFINE_float('rmsprop_momentum', _DEFAULT_HPARAMS.rmsprop_momentum, 'Momentum.')
165 | flags.DEFINE_float('rmsprop_decay', _DEFAULT_HPARAMS.rmsprop_decay, 'Decay term for RMSProp.')
166 | flags.DEFINE_bool(
167 | 		"do_data_augmentation", False,
168 | 		"Whether or not to do data augmentation.")
169 | flags.DEFINE_bool(
170 | 		"use_mixed_precision", False,
171 | 		"Whether or not to use NVIDIA mixed precision. Requires NVIDIA card with at least compute level 7.0")
172 | 
173 | FLAGS = flags.FLAGS
174 | 
175 | 
176 | def _get_hparams_from_flags():
177 | 	"""Creates dict of hyperparameters from flags."""
178 | 	return lib.HParams(
179 | 			train_epochs=FLAGS.train_epochs,
180 | 			do_fine_tuning=FLAGS.do_fine_tuning,
181 | 			batch_size=FLAGS.batch_size,
182 | 			learning_rate=FLAGS.learning_rate,
183 | 			momentum=FLAGS.momentum,
184 | 			dropout_rate=FLAGS.dropout_rate,
185 | 			label_smoothing=FLAGS.label_smoothing,
186 | 			validation_split=FLAGS.validation_split,
187 | 			optimizer=FLAGS.optimizer,
188 | 			adadelta_rho=FLAGS.adadelta_rho,
189 | 			adagrad_initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value,
190 | 			adam_beta1=FLAGS.adam_beta1,
191 | 			adam_beta2=FLAGS.adam_beta2,
192 | 			opt_epsilon=FLAGS.opt_epsilon,
193 | 			ftrl_learning_rate_power=FLAGS.ftrl_learning_rate_power,
194 | 			ftrl_initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
195 | 			ftrl_l1=FLAGS.ftrl_l1,
196 | 			ftrl_l2=FLAGS.ftrl_l2,
197 | 			rmsprop_momentum=FLAGS.rmsprop_momentum,
198 | 			rmsprop_decay=FLAGS.rmsprop_decay,
199 | 			do_data_augmentation=FLAGS.do_data_augmentation,
200 | 			use_mixed_precision=FLAGS.use_mixed_precision
201 | 			)
202 | 			
203 | 
204 | 
205 | 
206 | def _check_keras_dependencies():
207 | 	"""Checks dependencies of tf.keras.preprocessing.image are present.
208 | 
209 | 	This function may come to depend on flag values that determine the kind
210 | 	of preprocessing being done.
211 | 
212 | 	Raises:
213 | 		ImportError: If dependencies are missing.
214 | 	"""
215 | 	try:
216 | 		tf.keras.preprocessing.image.load_img(six.BytesIO())
217 | 	except ImportError:
218 | 		print("\n*** Unsatisfied dependencies of keras_preprocessing.image. ***\n"
219 | 					"To install them, use your system's equivalent of\n"
220 | 					"pip install tensorflow_hub[make_image_classifier]\n")
221 | 		raise
222 | 	except Exception as e:	# pylint: disable=broad-except
223 | 		# Loading from dummy content as above is expected to fail in other ways.
224 | 		pass
225 | 
226 | 
227 | def _assert_accuracy(train_result, assert_accuracy_at_least):
228 | 	# Fun fact: With TF1 behavior, the key was called "val_acc".
229 | 	val_accuracy = train_result.history["val_accuracy"][-1]
230 | 	accuracy_message = "found {:f}, expected at least {:f}".format(
231 | 			val_accuracy, assert_accuracy_at_least)
232 | 	if val_accuracy >= assert_accuracy_at_least:
233 | 		print("ACCURACY PASSED:", accuracy_message)
234 | 	else:
235 | 		raise AssertionError("ACCURACY FAILED:", accuracy_message)
236 | 
237 | def main(args):
238 | 	"""Main function to be called by absl.app.run() after flag parsing."""
239 | 	del args
240 | 
241 | 	#policy = mixed_precision.Policy('mixed_float16')
242 | 	#mixed_precision.set_policy(policy)
243 | 
244 | #tf.config.gpu.set_per_process_memory_fraction(0.75)
245 | #tf.config.gpu.set_per_process_memory_growth(False)
246 | 	physical_devices = tf.config.list_physical_devices('GPU') 
247 | 	try: 
248 | 		tf.config.experimental.set_memory_growth(physical_devices[0], True) 
249 | 		print('Configured device')
250 | 	except: 
251 | 		# Invalid device or cannot modify virtual devices once initialized. 
252 | 		pass
253 | 
254 | 	_check_keras_dependencies()
255 | 	hparams = _get_hparams_from_flags()
256 | 
257 | 	image_dir = FLAGS.image_dir or lib.get_default_image_dir()
258 | 
259 | 	model, labels, train_result, frozen_graph = lib.make_image_classifier(
260 | 			FLAGS.tfhub_module, image_dir, hparams, FLAGS.image_size, FLAGS.saved_model_dir)
261 | 	if FLAGS.assert_accuracy_at_least:
262 | 		_assert_accuracy(train_result, FLAGS.assert_accuracy_at_least)
263 | 	print("Done with training.")
264 | 
265 | 	if FLAGS.labels_output_file:
266 | 		labels_dir_path = os.path.dirname(FLAGS.labels_output_file)
267 | 		# Ensure dir structure exists
268 | 		Path(labels_dir_path).mkdir(parents=True, exist_ok=True)
269 | 		with tf.io.gfile.GFile(FLAGS.labels_output_file, "w") as f:
270 | 			f.write("\n".join(labels + ("",)))
271 | 		print("Labels written to", FLAGS.labels_output_file)
272 | 
273 | 	saved_model_dir = FLAGS.saved_model_dir
274 | 
275 | 	if FLAGS.tflite_output_file and not saved_model_dir:
276 | 		# We need a SavedModel for conversion, even if the user did not request it.
277 | 		saved_model_dir = tempfile.mkdtemp()
278 | 
279 | 	if saved_model_dir:
280 | 		# Ensure dir structure exists
281 | 		Path(saved_model_dir).mkdir(parents=True, exist_ok=True)
282 | 		tf.saved_model.save(model, saved_model_dir)
283 | 		keras_model_path = os.path.join(saved_model_dir, "saved_model.h5")
284 | 		weights_path = os.path.join(saved_model_dir, "saved_model_weights.h5")
285 | 		model.save(keras_model_path)
286 | 		model.save_weights(weights_path)
287 | 		print("SavedModel model exported to", saved_model_dir)
288 | 
289 | 	if FLAGS.tflite_output_file:
290 | 		tflite_dir_path = os.path.dirname(FLAGS.tflite_output_file)
291 | 		# Ensure dir structure exists
292 | 		Path(tflite_dir_path).mkdir(parents=True, exist_ok=True)
293 | 		converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
294 | 		lite_model_content = converter.convert()
295 | 		with tf.io.gfile.GFile(FLAGS.tflite_output_file, "wb") as f:
296 | 			f.write(lite_model_content)
297 | 		print("TFLite model exported to", FLAGS.tflite_output_file)
298 | 
299 | 	if saved_model_dir:
300 | 		# Save the frozen graph
301 | 		# Ensure dir structure exists
302 | 		Path(saved_model_dir).mkdir(parents=True, exist_ok=True)
303 | 		tf.io.write_graph(graph_or_graph_def=frozen_graph,
304 | 		logdir=saved_model_dir,
305 | 		name="frozen_graph.pb",
306 | 		as_text=False)
307 | 
308 | 
309 | def _ensure_tf2():
310 | 	"""Ensure running with TensorFlow 2 behavior.
311 | 
312 | 	This function is safe to call even before flags have been parsed.
313 | 
314 | 	Raises:
315 | 		ImportError: If tensorflow is too old for proper TF2 behavior.
316 | 	"""
317 | 	logging.info("Running with tensorflow %s (git version %s) and hub %s",
318 | 							 tf.__version__, tf.__git_version__, hub.__version__)
319 | 	if tf.__version__.startswith("1."):
320 | 		if tf.__git_version__ == "unknown":	# For internal testing use.
321 | 			try:
322 | 				tf.compat.v1.enable_v2_behavior()
323 | 				return
324 | 			except AttributeError:
325 | 				pass	# Fail below for missing enabler function.
326 | 		raise ImportError("Sorry, this program needs TensorFlow 2.")
327 | 
328 | 
329 | def run_main():
330 | 	"""Entry point equivalent to executing this file."""
331 | 	_ensure_tf2()
332 | 	app.run(main)
333 | 
334 | 
335 | if __name__ == "__main__":
336 | 	run_main()
337 | 


--------------------------------------------------------------------------------
/training/make_nsfw_model_lib.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #	 http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Trains a TensorFlow model based on directories of images.
 16 | 
 17 | This library provides the major pieces for make_image_classifier (see there).
 18 | """
 19 | 
 20 | from __future__ import absolute_import
 21 | from __future__ import division
 22 | from __future__ import print_function
 23 | from __future__ import unicode_literals
 24 | import multiprocessing
 25 | from pathlib import Path
 26 | from absl import app
 27 | from absl import flags
 28 | from absl import logging
 29 | from tensorflow import keras
 30 | from tensorflow.keras.mixed_precision import experimental as mixed_precision
 31 | from tensorflow.core.framework import attr_value_pb2
 32 | from tensorflow.core.framework import graph_pb2
 33 | from tensorflow.core.framework import node_def_pb2
 34 | from tensorflow.keras import layers
 35 | from tensorflow.python.framework import dtypes
 36 | from tensorflow.python.framework import ops
 37 | from tensorflow.python.framework import tensor_util
 38 | from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
 39 | from tensorflow.python.platform import tf_logging as logging
 40 | from tensorflow.python.util import deprecation
 41 | from tensorflow.python.util.tf_export import tf_export
 42 | import collections
 43 | import copy
 44 | import numpy as np
 45 | import os
 46 | import re
 47 | import six
 48 | import tempfile
 49 | import tensorflow as tf
 50 | import tensorflow.keras.backend as K
 51 | import tensorflow_hub as hub
 52 | 
 53 | 
 54 | _DEFAULT_IMAGE_URL = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
 55 | 
 56 | # From https://github.com/tensorflow/hub/issues/390#issuecomment-544489095
 57 | # Woops, this doesn't actually work. Sad face emoji.
 58 | class Wrapper(tf.train.Checkpoint):
 59 | 	def __init__(self, spec):
 60 | 		super(Wrapper, self).__init__()
 61 | 		self.module = hub.load(spec, tags=[])
 62 | 		self.variables = self.module.variables
 63 | 		self.trainable_variables = []
 64 | 	def __call__(self, x):
 65 | 		return self.module.signatures["default"](x)["default"]
 66 | 
 67 | def get_default_image_dir():
 68 | 	"""Returns the path to a default image dataset, downloading it if needed."""
 69 | 	return tf.keras.utils.get_file("flower_photos",
 70 | 								 _DEFAULT_IMAGE_URL, untar=True)
 71 | 
 72 | def configure_optimizer(hparams):
 73 | 	"""Configures the optimizer used for training.
 74 | 	
 75 | 	Args:
 76 | 		learning_rate: A scalar or `Tensor` learning rate.
 77 | 	
 78 | 	Returns:
 79 | 		An instance of an optimizer.
 80 | 	
 81 | 	Raises:
 82 | 		ValueError: if hparams.optimizer is not recognized.
 83 | 	"""
 84 | 	if hparams.optimizer == 'adadelta':
 85 | 		optimizer = tf.keras.optimizers.Adadelta(
 86 | 			hparams.learning_rate,
 87 | 			rho=hparams.adadelta_rho,
 88 | 			epsilon=hparams.opt_epsilon)
 89 | 	elif hparams.optimizer == 'adagrad':
 90 | 		optimizer = tf.keras.optimizers.Adagrad(
 91 | 			hparams.learning_rate,
 92 | 			initial_accumulator_value=hparams.adagrad_initial_accumulator_value)
 93 | 	elif hparams.optimizer == 'adam':
 94 | 		optimizer = tf.keras.optimizers.Adam(
 95 | 			hparams.learning_rate,
 96 | 			beta_1=hparams.adam_beta1,
 97 | 			beta_2=hparams.adam_beta2,
 98 | 			epsilon=hparams.opt_epsilon)
 99 | 	elif hparams.optimizer == 'ftrl':
100 | 		optimizer = tf.keras.optimizers.Ftrl(
101 | 			hparams.learning_rate,
102 | 			learning_rate_power=hparams.ftrl_learning_rate_power,
103 | 			initial_accumulator_value=hparams.ftrl_initial_accumulator_value,
104 | 			l1_regularization_strength=hparams.ftrl_l1,
105 | 			l2_regularization_strength=hparams.ftrl_l2)  
106 | 	elif hparams.optimizer == 'rmsprop':
107 | 		optimizer = tf.keras.optimizers.RMSprop(learning_rate=hparams.learning_rate, epsilon=hparams.opt_epsilon, momentum=hparams.rmsprop_momentum)	
108 | 	elif hparams.optimizer == 'sgd':
109 | 		optimizer = tf.keras.optimizers.SGD(learning_rate=hparams.learning_rate, momentum=hparams.momentum)
110 | 	else:
111 | 		raise ValueError('Optimizer [%s] was not recognized' % hparams.optimizer)
112 | 	return optimizer
113 | 
114 | 
115 | class HParams(
116 | 	collections.namedtuple("HParams", [
117 | 		"train_epochs", "do_fine_tuning", "batch_size", "learning_rate",
118 | 		"momentum", "dropout_rate", "label_smoothing", "validation_split",
119 | 		"optimizer", "adadelta_rho", "adagrad_initial_accumulator_value",
120 | 		"adam_beta1", "adam_beta2", "opt_epsilon", "ftrl_learning_rate_power",
121 | 		"ftrl_initial_accumulator_value", "ftrl_l1", "ftrl_l2", "rmsprop_momentum",
122 | 		"rmsprop_decay", "do_data_augmentation", "use_mixed_precision"
123 | 	])):
124 | 	"""The hyperparameters for make_image_classifier.
125 | 
126 | 	train_epochs: Training will do this many iterations over the dataset.
127 | 	do_fine_tuning: If true, the Hub module is trained together with the
128 | 	classification layer on top.
129 | 	batch_size: Each training step samples a batch of this many images.
130 | 	learning_rate: The learning rate to use for gradient descent training.
131 | 	momentum: The momentum parameter to use for gradient descent training.
132 | 	dropout_rate: The fraction of the input units to drop, used in dropout layer.
133 | """
134 | 
135 | 
136 | def get_default_hparams():
137 | 	"""Returns a fresh HParams object initialized to default values."""
138 | 	return HParams(
139 | 		train_epochs=5,
140 | 		do_fine_tuning=False,
141 | 		batch_size=32,
142 | 		learning_rate=0.005,
143 | 		momentum=0.9,
144 | 		dropout_rate=0.2,
145 | 		label_smoothing=0.1,
146 | 		validation_split=.20,
147 | 		optimizer='rmsprop',
148 | 		adadelta_rho=0.95,
149 | 		adagrad_initial_accumulator_value=0.1,
150 | 		adam_beta1=0.9,
151 | 		adam_beta2=0.999,
152 | 		opt_epsilon=1.0,
153 | 		ftrl_learning_rate_power=-0.5,
154 | 		ftrl_initial_accumulator_value=0.1,
155 | 		ftrl_l1=0.0,
156 | 		ftrl_l2=0.0,
157 | 		rmsprop_momentum=0.9,
158 | 		rmsprop_decay=0.9,
159 | 		do_data_augmentation=False,
160 | 		use_mixed_precision=False
161 | 		)
162 | 
163 | 
164 | def _get_data_with_keras(image_dir, image_size, batch_size,
165 | 						 validation_size=0.2, do_data_augmentation=False):
166 | 	"""Gets training and validation data via keras_preprocessing.
167 | 
168 | 	Args:
169 | 	image_dir: A Python string with the name of a directory that contains
170 | 		subdirectories of images, one per class.
171 | 	image_size: A list or tuple with 2 Python integers specifying
172 | 		the fixed height and width to which input images are resized.
173 | 	batch_size: A Python integer with the number of images per batch of
174 | 		training and validation data.
175 | 	do_data_augmentation: An optional boolean, controlling whether the
176 | 		training dataset is augmented by randomly distorting input images.
177 | 
178 | 	Returns:
179 | 	A nested tuple ((train_data, train_size),
180 | 					(valid_data, valid_size), labels) where:
181 | 	train_data, valid_data: Generators for use with Model.fit_generator,
182 | 		each yielding tuples (images, labels) where
183 | 		images is a float32 Tensor of shape [batch_size, height, width, 3]
184 | 			with pixel values in range [0,1],
185 | 		labels is a float32 Tensor of shape [batch_size, num_classes]
186 | 			with one-hot encoded classes.
187 | 	train_size, valid_size: Python integers with the numbers of training
188 | 		and validation examples, respectively.
189 | 	labels: A tuple of strings with the class labels (subdirectory names).
190 | 		The index of a label in this tuple is the numeric class id.
191 | 	"""
192 | 	datagen_kwargs = dict(rescale=1./255,
193 | 						# TODO(b/139467904): Expose this as a flag.
194 | 						validation_split=validation_size)
195 | 	dataflow_kwargs = dict(target_size=image_size, batch_size=batch_size,
196 | 						 interpolation="bilinear")
197 | 
198 | 	valid_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
199 | 		**datagen_kwargs)
200 | 	valid_generator = valid_datagen.flow_from_directory(
201 | 		image_dir, subset="validation", shuffle=False, **dataflow_kwargs)
202 | 
203 | 	if do_data_augmentation:
204 | 		# TODO(b/139467904): Expose the following constants as flags.
205 | 		train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
206 | 			rotation_range=40, horizontal_flip=True, width_shift_range=0.2,
207 | 			height_shift_range=0.2, shear_range=0.2, zoom_range=0.2,
208 | 			**datagen_kwargs)
209 | 	else:
210 | 		train_datagen = valid_datagen
211 | 
212 | 	train_generator = train_datagen.flow_from_directory(
213 | 			image_dir, subset="training", shuffle=True, **dataflow_kwargs)
214 | 
215 | 	indexed_labels = [(index, label)
216 | 					for label, index in train_generator.class_indices.items()]
217 | 	sorted_indices, sorted_labels = zip(*sorted(indexed_labels))
218 | 	assert sorted_indices == tuple(range(len(sorted_labels)))
219 | 	return ((train_generator, train_generator.samples),
220 | 			(valid_generator, valid_generator.samples),
221 | 			sorted_labels)
222 | 
223 | 
224 | def _image_size_for_module(module_layer, requested_image_size=None):
225 | 	"""Returns the input image size to use with the given module.
226 | 
227 | 	Args:
228 | 	module_layer: A hub.KerasLayer initialized from a Hub module expecting
229 | 		image input.
230 | 	requested_image_size: An optional Python integer with the user-requested
231 | 		height and width of the input image; or None.
232 | 
233 | 	Returns:
234 | 	A tuple (height, width) of Python integers that can be used as input
235 | 	image size for the given module_layer.
236 | 
237 | 	Raises:
238 | 	ValueError: If requested_image_size is set but incompatible with the module.
239 | 	ValueError: If the module does not specify a particular inpurt size and
240 | 		 requested_image_size is not set.
241 | 	"""
242 | 	# TODO(b/139530454): Use a library helper function once available.
243 | 	# The stop-gap code below assumes any concrete function backing the
244 | 	# module call will accept a batch of images with the one accepted size.
245 | 
246 | 	module_image_size = tuple(
247 | 	module_layer._func.__call__	# pylint:disable=protected-access
248 | 	.concrete_functions[0].structured_input_signature[0][0].shape[1:3])
249 | 	
250 | 	if requested_image_size is None:
251 | 		if None in module_image_size:
252 | 			raise ValueError("Must specify an image size because "
253 | 							 "the selected TF Hub module specifies none.")
254 | 		else:
255 | 			return module_image_size
256 | 	else:
257 | 		requested_image_size = tf.TensorShape([requested_image_size, requested_image_size])
258 | 		assert requested_image_size.is_fully_defined()
259 | 
260 | 	if requested_image_size.is_compatible_with(module_image_size):
261 | 		return tuple(requested_image_size.as_list())
262 | 	else:
263 | 		raise ValueError("The selected TF Hub module expects image size {}, "
264 | 						 "but size {} is requested".format(
265 | 							 module_image_size,
266 | 							 tuple(requested_image_size.as_list())))
267 | 
268 | 
269 | def build_model(module_layer, hparams, image_size, num_classes):
270 | 	"""Builds the full classifier model from the given module_layer.
271 | 
272 | 	Args:
273 | 	module_layer: Pre-trained tfhub model layer.
274 | 	hparams: A namedtuple of hyperparameters. This function expects
275 | 		.dropout_rate: The fraction of the input units to drop, used in dropout
276 | 		layer.
277 | 	image_size: The input image size to use with the given module layer.
278 | 	num_classes: Number of the classes to be predicted.
279 | 
280 | 	Returns:
281 | 	The full classifier model.
282 | 	"""
283 | 	# TODO(b/139467904): Expose the hyperparameters below as flags.
284 | 
285 | 	if hparams.dropout_rate is not None and hparams.dropout_rate > 0:
286 | 		model = tf.keras.Sequential([
287 | 				tf.keras.Input(shape=(image_size[0], image_size[1], 3), name='input', dtype='float32'), 
288 | 				module_layer,
289 | 				tf.keras.layers.Dropout(rate=hparams.dropout_rate),
290 | 				tf.keras.layers.Dense(
291 | 					num_classes,
292 | 					kernel_regularizer=tf.keras.regularizers.l2(0.0001)),
293 | 				tf.keras.layers.Activation('softmax', dtype='float32', name='prediction')
294 | 			])
295 | 	else:
296 | 		model = tf.keras.Sequential([
297 | 				tf.keras.Input(shape=(image_size[0], image_size[1], 3), name='input', dtype='float32'), 
298 | 				module_layer,
299 | 				tf.keras.layers.Dense(
300 | 					num_classes,
301 | 					kernel_regularizer=None),
302 | 				tf.keras.layers.Activation('softmax', dtype='float32', name='prediction')
303 | 			])
304 | 	
305 | 	print(model.summary())
306 | 	return model
307 | 
308 | 
309 | def train_model(model, hparams, train_data_and_size, valid_data_and_size):
310 | 	"""Trains model with the given data and hyperparameters.
311 | 
312 | 	Args:
313 | 	model: The tf.keras.Model from _build_model().
314 | 	hparams: A namedtuple of hyperparameters. This function expects
315 | 		.train_epochs: a Python integer with the number of passes over the
316 | 		training dataset;
317 | 		.learning_rate: a Python float forwarded to the optimizer;
318 | 		.momentum: a Python float forwarded to the optimizer;
319 | 		.batch_size: a Python integer, the number of examples returned by each
320 | 		call to the generators.
321 | 	train_data_and_size: A (data, size) tuple in which data is training data to
322 | 		be fed in tf.keras.Model.fit(), size is a Python integer with the
323 | 		numbers of training.
324 | 	valid_data_and_size: A (data, size) tuple in which data is validation data
325 | 		to be fed in tf.keras.Model.fit(), size is a Python integer with the
326 | 		numbers of validation.
327 | 
328 | 	Returns:
329 | 	The tf.keras.callbacks.History object returned by tf.keras.Model.fit().
330 | 	"""
331 | 
332 | 	earlystop_callback = tf.keras.callbacks.EarlyStopping(
333 |   		monitor='val_accuracy', min_delta=0.0001,
334 |   		patience=1)
335 | 
336 | 	train_data, train_size = train_data_and_size
337 | 	valid_data, valid_size = valid_data_and_size
338 | 	# TODO(b/139467904): Expose this hyperparameter as a flag.
339 | 	loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=hparams.label_smoothing)
340 | 
341 | 	if hparams.use_mixed_precision is True:
342 | 		optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(configure_optimizer(hparams))
343 | 	else:
344 | 		optimizer = configure_optimizer(hparams)
345 | 
346 | 	model.compile(
347 | 		optimizer=optimizer,
348 | 		loss=loss,
349 | 		metrics=["accuracy"])
350 | 	steps_per_epoch = train_size // hparams.batch_size
351 | 	validation_steps = valid_size // hparams.batch_size
352 | 	return model.fit(
353 | 		train_data,
354 | 		use_multiprocessing=False,
355 | 		workers=multiprocessing.cpu_count() -1,
356 | 		epochs=hparams.train_epochs,
357 | 		callbacks=[earlystop_callback],
358 | 		steps_per_epoch=steps_per_epoch,
359 | 		validation_data=valid_data,
360 | 		validation_steps=validation_steps)
361 | 
362 | def model_to_frozen_graph(model):
363 |  
364 | 	# Convert Keras model to ConcreteFunction
365 | 	# In the resulting graph, "self" will be the input node
366 | 	# and the very last softmax layer in the graph will be the
367 | 	# output prediction node.
368 | 
369 | 	full_model = tf.function(model)
370 | 	full_model = full_model.get_concrete_function(
371 | 	tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))
372 | 
373 | 	# Get frozen ConcreteFunction
374 | 	frozen_func = convert_variables_to_constants_v2(full_model)
375 | 	input_graph = frozen_func.graph.as_graph_def()
376 | 
377 | 	types_to_remove = {"CheckNumerics": True, "Identity": True}
378 | 
379 | 	input_nodes = input_graph.node
380 | 	names_to_remove = {}
381 | 	
382 | 	# We're going to clean up some junk nodes that we do not
383 | 	# need outside of training. I assume these are inherited
384 | 	# from tensorflow hub.
385 | 	for node in input_nodes:
386 | 		if '/input_control_node/_'.upper() in node.name.upper():
387 | 			names_to_remove[node.name] = True
388 | 
389 | 		if '/output_control_node/_'.upper() in node.name.upper():
390 | 			names_to_remove[node.name] = True
391 | 
392 | 	# What we're doing here is double-iterating over the graph nodes
393 | 	# looking for disconnected/orphaned nodes. Any node who's name 
394 | 	# cannot be found inside the inputs of another node is considered
395 | 	# trash that caused me pain and suffering for two days, so they're
396 | 	# going to be deleted.
397 | 	#
398 | 	# On a serious note, these are leftover junk (I assume) from the 
399 | 	# tensorflow hub input that is not needed outside of training.
400 | 	for node in input_nodes:
401 | 		noOutput = True
402 | 		for inner in input_nodes:
403 | 			resa = [i for i in inner.input if node.name.upper() in i.upper()] 
404 | 			if len(resa) > 0:
405 | 				noOutput = False
406 | 
407 | 		if noOutput is True:
408 | 			names_to_remove[node.name] = True
409 | 
410 | 	# We're going to look for junk nodes (used only in training) that are connected
411 | 	# to our output Softmax layer and mark those for deletion as well.
412 | 	for node in input_nodes:
413 | 		if node.op in types_to_remove:
414 | 
415 | 			# Find all nodes of type Identity that are connected to a Softmax (our output)
416 | 			found = [i for i in node.input if 'softmax'.upper() in i.upper()] 
417 | 
418 | 			if found is not None and len(found) > 0:
419 | 				names_to_remove[node.name] = True
420 | 			
421 | 	# The rest of this code is basically a straight-copy-and-paste from
422 | 	# the remove_nodes function of TF1.
423 | 	nodes_after_removal = []
424 | 	for node in input_nodes:
425 | 		if node.name in names_to_remove:
426 | 			continue
427 | 		new_node = node_def_pb2.NodeDef()
428 | 		new_node.CopyFrom(node)
429 | 		input_before_removal = node.input
430 | 		del new_node.input[:]
431 | 		for full_input_name in input_before_removal:
432 | 			input_name = re.sub(r"^\^", "", full_input_name)
433 | 			if input_name in names_to_remove:
434 | 				continue
435 | 			new_node.input.append(full_input_name)
436 | 		nodes_after_removal.append(new_node)
437 | 
438 | 	# TODO - We may be able to just delete all of this code here, as it
439 | 	# it was unused by me and I was able to get a functional output.
440 | 	# When this TODO is tackled, just delete everything that has to do
441 | 	# with node splicing. In the final output, these nodes become either
442 | 	# Const or NoOp nodes anyway so they're junk, but harmless junk.
443 | 	types_to_splice = {"Identityzzz": True}
444 | 	control_input_names = set()
445 | 	node_names_with_control_input = set()
446 | 	for node in nodes_after_removal:
447 | 		for node_input in node.input:
448 | 			if "^" in node_input:
449 | 				control_input_names.add(node_input.replace("^", ""))
450 | 				node_names_with_control_input.add(node.name)
451 | 
452 | 	names_to_splice = {}
453 | 	for node in nodes_after_removal:
454 | 		if node.op in types_to_splice and node.name not in protected_nodes:
455 | 			# We don't want to remove nodes that have control edge inputs, because
456 | 			# they might be involved in subtle dependency issues that removing them
457 | 			# will jeopardize.
458 | 			if node.name not in node_names_with_control_input and len(node.input) > 0:
459 | 				names_to_splice[node.name] = node.input[0]
460 | 
461 | 	# We also don't want to remove nodes which are used as control edge inputs.
462 | 	names_to_splice = {name: value for name, value in names_to_splice.items()
463 | 										 if name not in control_input_names}
464 | 
465 | 	nodes_after_splicing = []
466 | 	for node in nodes_after_removal:
467 | 		if node.name in names_to_splice:
468 | 			continue
469 | 		new_node = node_def_pb2.NodeDef()
470 | 		new_node.CopyFrom(node)
471 | 		input_before_removal = node.input
472 | 		del new_node.input[:]
473 | 		for full_input_name in input_before_removal:
474 | 			input_name = re.sub(r"^\^", "", full_input_name)
475 | 			while input_name in names_to_splice:
476 | 				full_input_name = names_to_splice[input_name]
477 | 				input_name = re.sub(r"^\^", "", full_input_name)
478 | 			new_node.input.append(full_input_name)
479 | 		nodes_after_splicing.append(new_node)
480 | 
481 | 	output_graph = graph_pb2.GraphDef()
482 | 	output_graph.node.extend(nodes_after_splicing)
483 | 	return output_graph
484 | 
485 | def make_image_classifier(tfhub_module, image_dir, hparams,
486 | 							requested_image_size=None, saveModelDir=False):
487 | 	"""Builds and trains a TensorFLow model for image classification.
488 | 
489 | 	Args:
490 | 	tfhub_module: A Python string with the handle of the Hub module.
491 | 	image_dir: A Python string naming a directory with subdirectories of images,
492 | 		one per class.
493 | 	hparams: A HParams object with hyperparameters controlling the training.
494 | 	requested_image_size: A Python integer controlling the size of images to
495 | 		feed into the Hub module. If the module has a fixed input size, this
496 | 		must be omitted or set to that same value.
497 | 	"""
498 | 
499 | 	print("Using hparams:")
500 | 	for key, value in hparams._asdict().items():
501 | 		print("\t{0} : {1}".format(key, value))
502 | 		
503 | 	module_layer = hub.KerasLayer(tfhub_module, trainable=hparams.do_fine_tuning)
504 | 	
505 | 	image_size = _image_size_for_module(module_layer, requested_image_size)
506 | 	print("Using module {} with image size {}".format(
507 | 		tfhub_module, image_size))
508 | 	train_data_and_size, valid_data_and_size, labels = _get_data_with_keras(
509 | 		image_dir, image_size, hparams.batch_size, hparams.validation_split, hparams.do_data_augmentation)
510 | 	print("Found", len(labels), "classes:", ", ".join(labels))
511 | 
512 | 	model = build_model(module_layer, hparams, image_size, len(labels))
513 | 
514 | 	# If we are fine-tuning, check and see if weights
515 | 	# already exists at the output directory. This way, a user
516 | 	# can simply run two consecutive training sessions. One without
517 | 	# fine-tuning, followed by another with.
518 | 	if hparams.do_fine_tuning:
519 | 		if saveModelDir is not None:
520 | 			existingWeightsPath = os.path.join(saveModelDir, "saved_model_weights.h5")
521 | 			if os.path.exists(existingWeightsPath):
522 | 				print("Loading existing weights for fine-tuning")
523 | 				model.load_weights(existingWeightsPath)
524 | 
525 | 	train_result = train_model(model, hparams, train_data_and_size,
526 | 							 valid_data_and_size)
527 | 
528 | 	# Tear down model, set training to 0 and then re-create.
529 | 	# 1 - Save model weights as Keras H5.
530 | 
531 | 	tempDir = tempfile.gettempdir()
532 | 	tempModelWeightsFile = os.path.join(tempDir, "weights.h5")
533 | 
534 | 	model.save_weights(tempModelWeightsFile)
535 | 
536 | 	# 2 - Set training to 0
537 | 
538 | 	K.clear_session()
539 | 	K.set_learning_phase(0)
540 | 
541 | 	# 3 - Create model again
542 | 
543 | 	model = build_model(module_layer, hparams, image_size, len(labels))
544 | 
545 | 	# 4 - Load model weights.
546 | 
547 | 	model.load_weights(tempModelWeightsFile)
548 | 
549 | 	# Clean up temp weights file
550 | 	os.remove(tempModelWeightsFile)
551 | 	
552 | 	# 5 - Pass model to lib.model_to_frozen_graph.
553 | 	frozen_inference_graph = model_to_frozen_graph(model)
554 | 
555 | 	return model, labels, train_result, frozen_inference_graph
556 | 


--------------------------------------------------------------------------------
/training/train_all_models.cmd:
--------------------------------------------------------------------------------
 1 | :: You can add more models types from here: https://tfhub.dev/s?module-type=image-classification&tf-version=tf2
 2 | :: However, you must choose Tensorflow 2 models. V1 models will not work here.
 3 | :: https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4
 4 | :: https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4
 5 | :: https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4
 6 | :: https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4
 7 | ::
 8 | :: If you get CUDA_OUT_OF_MEMORY crash, you need to pass --batch_size NUMBER, reducing until you don't get this error.
 9 | :: It is advised by Google not to have a batch size < 8.
10 | 
11 | :: Note that we set all of our target epochs to over 9000. This is because the trainer just uses early stopping internally.
12 | 
13 | :: Train Mobilenet V2 140
14 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\mobilenet_v2_140_224 --labels_output_file %cd%\..\trained_models\mobilenet_v2_140_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4 --tflite_output_file %cd%\..\trained_models\mobilenet_v2_140_224\saved_model.tflite --train_epochs 9001 --batch_size 32 --do_fine_tuning --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
15 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
16 | :: tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\mobilenet_v2_140_224 %cd%\..\trained_models\mobilenet_v2_140_224\web_model
17 | :: Or, for a quantized (1 byte) version
18 | :: tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\mobilenet_v2_140_224 %cd%\..\trained_models\mobilenet_v2_140_224\web_model_quantized --quantization_bytes 1
19 | 
20 | :: Wait for Python/CUDA/GPU to recover. Seems to die without this.
21 | Timeout /T 60 /Nobreak
22 | 
23 | :: Train Resnet V2 50
24 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\resnet_v2_50_224 --labels_output_file %cd%\..\trained_models\resnet_v2_50_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4 --tflite_output_file %cd%\..\trained_models\resnet_v2_50_224\saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
25 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
26 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\resnet_v2_50_224 %cd%\..\trained_models\resnet_v2_50_224\web_model
27 | :: Or, for a quantized (1 byte) version
28 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\resnet_v2_50_224 %cd%\..\trained_models\resnet_v2_50_224\web_model_quantized --quantization_bytes 1
29 | 
30 | :: Wait for Python/CUDA/GPU to recover. Seems to die without this.
31 | Timeout /T 60 /Nobreak
32 | 
33 | :: Train Inception V3
34 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\inception_v3_224 --labels_output_file %cd%\..\trained_models\inception_v3_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 --tflite_output_file %cd%\..\trained_models\inception_v3_224\saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
35 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
36 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\inception_v3_224 %cd%\..\trained_models\inception_v3_224\web_model
37 | :: Or, for a quantized (1 byte) version
38 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\inception_v3_224 %cd%\..\trained_models\inception_v3_224\web_model_quantized --quantization_bytes 1
39 | 
40 | :: Wait for Python/CUDA/GPU to recover. Seems to die without this.
41 | Timeout /T 60 /Nobreak
42 | 
43 | :: Train NasNetMobile
44 | python make_nsfw_model.py --image_dir %cd%\..\images --image_size 224 --saved_model_dir %cd%\..\trained_models\nasnet_a_224 --labels_output_file %cd%\..\trained_models\nasnet_a_224\class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4 --tflite_output_file %cd%\..\trained_models\nasnet_a_224\saved_model.tflite --train_epochs 9001 --batch_size 24 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
45 | :: Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
46 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\nasnet_a_224 %cd%\..\trained_models\nasnet_a_224\web_modely
47 | :: Or, for a quantized (1 byte) version
48 | ::tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve %cd%\..\trained_models\nasnet_a_224 %cd%\..\trained_models\nasnet_a_224\web_modely_quantized --quantization_bytes 1


--------------------------------------------------------------------------------
/training/train_all_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # You can add more models types from here: https://tfhub.dev/s?module-type=image-classification&tf-version=tf2
 3 | # However, you must choose Tensorflow 2 models. V1 models will not work here.
 4 | # https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4
 5 | # https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4
 6 | # https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4
 7 | # https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4
 8 | # 
 9 | # If you get CUDA_OUT_OF_MEMORY crash, you need to pass --batch_size NUMBER, reducing until you don't get this error.
10 | # It is advised by Google not to have a batch size < 8.
11 | 
12 | # Note that we set all of our target epochs to over 9000. This is because the trainer just uses early stopping internally.
13 | 
14 | # Train Mobilenet V2 140
15 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/mobilenet_v2_140_224 --labels_output_file $PWD/../trained_models/mobilenet_v2_140_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4 --tflite_output_file $PWD/../trained_models/mobilenet_v2_140_224/saved_model.tflite --train_epochs 9001 --batch_size 32 --do_fine_tuning --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
16 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
17 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/mobilenet_v2_140_224 $PWD/../trained_models/mobilenet_v2_140_224/web_model
18 | # Or, for a quantized (1 byte) version
19 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/mobilenet_v2_140_224 $PWD/../trained_models/mobilenet_v2_140_224/web_model_quantized --quantization_bytes 1
20 | 
21 | # Wait for Python/CUDA/GPU to recover. Seems to die without this.
22 | sleep 60
23 | 
24 | # Train Resnet V2 50
25 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/resnet_v2_50_224 --labels_output_file $PWD/../trained_models/resnet_v2_50_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/4 --tflite_output_file $PWD/../trained_models/resnet_v2_50_224/saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
26 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
27 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/resnet_v2_50_224 $PWD/../trained_models/resnet_v2_50_224/web_model
28 | # Or, for a quantized (1 byte) version
29 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/resnet_v2_50_224 $PWD/../trained_models/resnet_v2_50_224/web_model_quantized --quantization_bytes 1
30 | 
31 | # Wait for Python/CUDA/GPU to recover. Seems to die without this.
32 | sleep 60
33 | 
34 | # Train Inception V3
35 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/inception_v3_224 --labels_output_file $PWD/../trained_models/inception_v3_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/inception_v3/feature_vector/4 --tflite_output_file $PWD/../trained_models/inception_v3_224/saved_model.tflite --train_epochs 9001 --batch_size 16 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
36 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
37 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/inception_v3_224 $PWD/../trained_models/inception_v3_224/web_model
38 | # Or, for a quantized (1 byte) version
39 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/inception_v3_224 $PWD/../trained_models/inception_v3_224/web_model_quantized --quantization_bytes 1
40 | 
41 | # Wait for Python/CUDA/GPU to recover. Seems to die without this.
42 | sleep 60
43 | 
44 | # Train NasNetMobile
45 | python make_nsfw_model.py --image_dir $PWD/../images --image_size 224 --saved_model_dir $PWD/../trained_models/nasnet_a_224 --labels_output_file $PWD/../trained_models/nasnet_a_224/class_labels.txt --tfhub_module https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4 --tflite_output_file $PWD/../trained_models/nasnet_a_224/saved_model.tflite --train_epochs 9001 --batch_size 24 --do_fine_tuning --learning_rate 0.001 --dropout_rate 0.0 --label_smoothing=0.0 --validation_split=0.1 --do_data_augmentation=True --use_mixed_precision=True --rmsprop_momentum=0.0
46 | # Note that installing tensorflowjs also installs tensorflow-cpu A.K.A. bye-bye-training. So make sure you perform this step after all your training is done, and then restore a GPU version of TF.
47 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/nasnet_a_224 $PWD/../trained_models/nasnet_a_224/web_modely
48 | # Or, for a quantized (1 byte) version
49 | # tensorflowjs_converter --input_format=tf_saved_model --output_format=tfjs_graph_model --signature_name=serving_default --saved_model_tags=serve $PWD/../trained_models/nasnet_a_224 $PWD/../trained_models/nasnet_a_224/web_modely_quantized --quantization_bytes 1


--------------------------------------------------------------------------------