├── .dvc
    ├── .gitignore
    └── config
├── .gitignore
├── README.md
├── requirements.txt
└── train.py


/.dvc/.gitignore:
--------------------------------------------------------------------------------
 1 | state
 2 | lock
 3 | config.local
 4 | updater
 5 | cache
 6 | /lock
 7 | /config.local
 8 | /updater
 9 | /updater.lock
10 | /state-journal
11 | /state-wal
12 | /state
13 | /cache
14 | /tmp
15 | 


--------------------------------------------------------------------------------
/.dvc/config:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iterative/example-versioning/3f6ec3ad3af6141e5b171322ff04748945337fe2/.dvc/config


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Example: Versioning 
2 | 
3 | Datasets and ML model getting started
4 | [versioning tutorial](https://dvc.org/doc/tutorials/versioning).
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pillow
2 | scipy
3 | tensorflow>=2,<3
4 | tqdm>=4.41.0,<5
5 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | '''This script goes along the blog post
  2 | "Building powerful image classification models using very little data"
  3 | from blog.keras.io.
  4 | 
  5 | In our example we will be using data that can be downloaded at:
  6 | https://www.kaggle.com/tongpython/cat-and-dog
  7 | 
  8 | In our setup, it expects:
  9 | - a data/ folder
 10 | - train/ and validation/ subfolders inside data/
 11 | - cats/ and dogs/ subfolders inside train/ and validation/
 12 | - put the cat pictures index 0-X in data/train/cats
 13 | - put the cat pictures index 1000-1400 in data/validation/cats
 14 | - put the dogs pictures index 0-X in data/train/dogs
 15 | - put the dog pictures index 1000-1400 in data/validation/dogs
 16 | 
 17 | We have X training examples for each class, and 400 validation examples
 18 | for each class. In summary, this is our directory structure:
 19 | ```
 20 | data/
 21 |     train/
 22 |         dogs/
 23 |             dog001.jpg
 24 |             dog002.jpg
 25 |             ...
 26 |         cats/
 27 |             cat001.jpg
 28 |             cat002.jpg
 29 |             ...
 30 |     validation/
 31 |         dogs/
 32 |             dog001.jpg
 33 |             dog002.jpg
 34 |             ...
 35 |         cats/
 36 |             cat001.jpg
 37 |             cat002.jpg
 38 |             ...
 39 | ```
 40 | '''
 41 | import numpy as np
 42 | import sys
 43 | import os
 44 | 
 45 | from tensorflow.keras.preprocessing.image import ImageDataGenerator
 46 | from tensorflow.keras.models import Sequential
 47 | from tensorflow.keras.layers import Dropout, Flatten, Dense
 48 | from tensorflow.keras import applications
 49 | from tensorflow.keras.callbacks import CSVLogger
 50 | from tqdm.keras import TqdmCallback
 51 | 
 52 | pathname = os.path.dirname(sys.argv[0])
 53 | path = os.path.abspath(pathname)
 54 | 
 55 | # dimensions of our images.
 56 | img_width, img_height = 150, 150
 57 | 
 58 | top_model_weights_path = 'model.weights.h5'
 59 | train_data_dir = os.path.join('data', 'train')
 60 | validation_data_dir = os.path.join('data', 'validation')
 61 | cats_train_path = os.path.join(path, train_data_dir, 'cats')
 62 | nb_train_samples = 2 * len([name for name in os.listdir(cats_train_path)
 63 |                             if os.path.isfile(
 64 |                                 os.path.join(cats_train_path, name))])
 65 | nb_validation_samples = 800
 66 | epochs = 10
 67 | batch_size = 10
 68 | 
 69 | 
 70 | def save_bottlebeck_features():
 71 |     datagen = ImageDataGenerator(rescale=1. / 255)
 72 | 
 73 |     # build the VGG16 network
 74 |     model = applications.VGG16(include_top=False, weights='imagenet')
 75 | 
 76 |     generator = datagen.flow_from_directory(
 77 |         train_data_dir,
 78 |         target_size=(img_width, img_height),
 79 |         batch_size=batch_size,
 80 |         class_mode=None,
 81 |         shuffle=False)
 82 |     bottleneck_features_train = model.predict(
 83 |         generator, nb_train_samples // batch_size)
 84 |     np.save(open('bottleneck_features_train.npy', 'wb'),
 85 |             bottleneck_features_train)
 86 | 
 87 |     generator = datagen.flow_from_directory(
 88 |         validation_data_dir,
 89 |         target_size=(img_width, img_height),
 90 |         batch_size=batch_size,
 91 |         class_mode=None,
 92 |         shuffle=False)
 93 |     bottleneck_features_validation = model.predict(
 94 |         generator, nb_validation_samples // batch_size)
 95 |     np.save(open('bottleneck_features_validation.npy', 'wb'),
 96 |             bottleneck_features_validation)
 97 | 
 98 | 
 99 | def train_top_model():
100 |     train_data = np.load(open('bottleneck_features_train.npy', 'rb'))
101 |     train_labels = np.array(
102 |         [0] * (int(nb_train_samples / 2)) + [1] * (int(nb_train_samples / 2)))
103 | 
104 |     validation_data = np.load(open('bottleneck_features_validation.npy', 'rb'))
105 |     validation_labels = np.array(
106 |         [0] * (int(nb_validation_samples / 2)) +
107 |         [1] * (int(nb_validation_samples / 2)))
108 | 
109 |     model = Sequential()
110 |     model.add(Flatten(input_shape=train_data.shape[1:]))
111 |     model.add(Dense(256, activation='relu'))
112 |     model.add(Dropout(0.5))
113 |     model.add(Dense(1, activation='sigmoid'))
114 | 
115 |     model.compile(optimizer='rmsprop',
116 |                   loss='binary_crossentropy', metrics=['accuracy'])
117 | 
118 |     model.fit(train_data, train_labels,
119 |               epochs=epochs,
120 |               batch_size=batch_size,
121 |               validation_data=(validation_data, validation_labels),
122 |               verbose=0,
123 |               callbacks=[TqdmCallback(), CSVLogger("metrics.csv")])
124 |     model.save_weights(top_model_weights_path)
125 | 
126 | 
127 | save_bottlebeck_features()
128 | train_top_model()
129 | 


--------------------------------------------------------------------------------