├── examples
    ├── resnetv2
    │   ├── requirements.txt
    │   ├── deepkit.yml
    │   └── model.py
    ├── keras-cifar10
    │   ├── requirements.txt
    │   ├── local.deepkit.yml
    │   ├── deepkit.yml
    │   └── model.py
    ├── tf2-keras-mnist
    │   ├── requirements.txt
    │   ├── deepkit.yml
    │   └── model.py
    ├── generic
    │   ├── deepkit.yml
    │   └── train.py
    ├── torch
    │   ├── local.deepkit.yml
    │   ├── resnet.py
    │   └── train.py
    ├── alot
    │   └── train.py
    ├── dynamic-experiments
    │   ├── sub-experiments.py
    │   └── threaded.py
    ├── ray
    │   └── dqn.py
    └── alexnet
    │   └── model.py
├── setup.cfg
├── README.md
├── deepkit
    ├── globals.py
    ├── home.py
    ├── utils
    │   ├── __init__.py
    │   ├── pilutil.py
    │   └── image.py
    ├── model.py
    ├── __init__.py
    ├── debugger.py
    ├── deepkit_keras.py
    ├── pytorch_graph.py
    ├── pytorch.py
    ├── client.py
    ├── keras_tf.py
    └── experiment.py
├── Makefile
├── tests
    └── test_home.py
└── setup.py


/examples/resnetv2/requirements.txt:
--------------------------------------------------------------------------------
1 | keras>2.0.0


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md


--------------------------------------------------------------------------------
/examples/keras-cifar10/requirements.txt:
--------------------------------------------------------------------------------
1 | keras==2.3.0
2 | 


--------------------------------------------------------------------------------
/examples/tf2-keras-mnist/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow>=2.0


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deepkit Python SDK
2 | 
3 | This is the Python SDK for Deepkit.
4 | 
5 | [View documentation](https://deepkit.ai/documentation/python-sdk/getting-started)


--------------------------------------------------------------------------------
/deepkit/globals.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | from typing import Optional
 3 | 
 4 | from deepkit.experiment import Experiment
 5 | 
 6 | last_experiment: Optional[Experiment] = None
 7 | 
 8 | loaded_job_config = None
 9 | 
10 | last_logs = io.StringIO('')
11 | 


--------------------------------------------------------------------------------
/examples/generic/deepkit.yml:
--------------------------------------------------------------------------------
1 | label: Generic data generation
2 | image: python:3.7
3 | build:
4 |     - ADD ../../deepkit:/deepkit-sdk/deepkit
5 |     - ADD ../../setup.py:/deepkit-sdk/setup.py
6 |     - pip install -e /deepkit-sdk/
7 | 
8 | command: python train.py
9 | 


--------------------------------------------------------------------------------
/examples/torch/local.deepkit.yml:
--------------------------------------------------------------------------------
 1 | config:
 2 |   batch_size: 32
 3 | 
 4 | ignore:
 5 |   - data
 6 |   - checkpoint
 7 |   - runs
 8 | 
 9 | env:
10 |   - PYTHONPATH=/Users/marc/bude/deepkit-python-sdk/
11 | 
12 | command: /usr/local/Cellar/python/3.7.6_1/bin/python3 train.py


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | zip:
2 | 	rm -f deepkit.zip
3 | 	zip deepkit.zip deepkit/*.py deepkit/utils/*.py README.md setup.cfg setup.py
4 | 
5 | publish:
6 | 	rm -rf dist/*
7 | 	python3 setup.py sdist bdist_wheel
8 | 	python3 -m twine upload --repository-url https://upload.pypi.org/legacy/ dist/*


--------------------------------------------------------------------------------
/examples/keras-cifar10/local.deepkit.yml:
--------------------------------------------------------------------------------
 1 | label: TF1 Keras Cifar10 Partial Host/Local
 2 | 
 3 | ignore:
 4 |     - 'report.*'
 5 |     - saved_models
 6 | 
 7 | output: saved_models
 8 | 
 9 | config:
10 |     lr: 0.8
11 |     batch_size: 128
12 |     epochs: 15
13 |     train_samples: 60000
14 |     test_samples: 10000
15 | 
16 | command: /usr/local/Cellar/python/3.7.6_1/bin/python3 model.py


--------------------------------------------------------------------------------
/deepkit/home.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import typedload
 5 | 
 6 | from deepkit.model import HomeConfig
 7 | 
 8 | 
 9 | def get_home_config() -> HomeConfig:
10 |     path = os.path.expanduser('~') + '/.deepkit/config'
11 |     if not os.path.exists(path):
12 |         raise Exception("No ~/.deepkit/config file found")
13 | 
14 |     with open(path, 'r') as h:
15 |         return typedload.load(json.load(h), HomeConfig)


--------------------------------------------------------------------------------
/examples/alot/train.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import deepkit
 3 | 
 4 | experiment = deepkit.experiment()
 5 | experiment.add_file(__file__)
 6 | 
 7 | test = experiment.define_metric('test')
 8 | 
 9 | for i in range(10):
10 |     experiment.set_info(i, random.random())
11 | 
12 | total = 100_000
13 | 
14 | for i in range(total):
15 |     test.send(i, random.gauss(25, 25/3))
16 |     experiment.epoch(i, total)
17 | 
18 | print("Bye.")
19 | 


--------------------------------------------------------------------------------
/examples/dynamic-experiments/sub-experiments.py:
--------------------------------------------------------------------------------
 1 | from time import sleep
 2 | 
 3 | import deepkit
 4 | 
 5 | experiment = deepkit.experiment(project='sub-experiments')
 6 | print('root job', experiment.id)
 7 | 
 8 | experiments = 10
 9 | 
10 | for i in range(experiments):
11 |     sub_experiment = experiment.create_sub_experiment()
12 |     print('sub job', sub_experiment.id)
13 | 
14 |     sub_experiment.done()
15 | 
16 | sleep(5)
17 | 


--------------------------------------------------------------------------------
/examples/generic/train.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import deepkit
 3 | from time import sleep
 4 | 
 5 | experiment = deepkit.experiment()
 6 | experiment.add_file(__file__)
 7 | 
 8 | test = experiment.define_metric('test')
 9 | 
10 | for i in range(10):
11 |     experiment.set_info(str(i), random.random())
12 | 
13 | total = 1_000
14 | 
15 | for i in range(total):
16 |     test.send(random.gauss(25, 25/3), x=i)
17 |     experiment.epoch(i, total)
18 |     sleep(0.005)
19 | 
20 | print("Bye.")
21 | 


--------------------------------------------------------------------------------
/examples/tf2-keras-mnist/deepkit.yml:
--------------------------------------------------------------------------------
 1 | title: TF2 Keras Fashion Mnist
 2 | image: tensorflow/tensorflow:2.0.1-gpu-py3
 3 | build:
 4 |     - pip install --upgrade pip
 5 |     - python -c 'from tensorflow.keras import datasets; datasets.fashion_mnist.load_data()'
 6 |     - ADD ../../deepkit:/deepkit-sdk/deepkit
 7 |     - ADD ../../setup.py:/deepkit-sdk/setup.py
 8 |     - pip install -e /deepkit-sdk/
 9 | 
10 | ignore:
11 |   - logs
12 | 
13 | #resources:
14 | #    minCpu: 2
15 | #    minMemory: 2
16 | 
17 | command: python model.py
18 | 


--------------------------------------------------------------------------------
/examples/resnetv2/deepkit.yml:
--------------------------------------------------------------------------------
 1 | title: Keras TF1 Resnet Cifar10
 2 | label: 'keras'
 3 | 
 4 | image: tensorflow/tensorflow:1.15.2-gpu-py3
 5 | build:
 6 |     - pip install --upgrade pip
 7 |     - ADD requirements.txt
 8 |     - pip install -r requirements.txt
 9 |     - python -c 'from keras.datasets import cifar10; cifar10.load_data()'
10 |     - ADD ../../deepkit:/deepkit-sdk/deepkit
11 |     - ADD ../../setup.py:/deepkit-sdk/setup.py
12 |     - pip install -e /deepkit-sdk/
13 | 
14 | ignore:
15 |     - report.*
16 |     - saved_models
17 | 
18 | output: saved_models
19 | 
20 | command: python model.py
21 | 


--------------------------------------------------------------------------------
/tests/test_home.py:
--------------------------------------------------------------------------------
 1 | import typedload
 2 | 
 3 | from deepkit.model import HomeConfig
 4 | 
 5 | 
 6 | def test_home_config_convert():
 7 |     config = typedload.load({
 8 |         'accounts': [
 9 |             {'id': '1', 'name': 'peter', 'port': 8080, 'ssl': False, 'username': '', 'host': 'deepkit.ai', 'token': 'abc'},
10 |             {'id': '2', 'name': 'localhost', 'port': 8080, 'ssl': False, 'username': '', 'host': 'deepkit.ai', 'token': 'abc'}
11 |         ],
12 |         'folderLinks': []
13 |     }, HomeConfig)
14 | 
15 |     assert config.get_account_for_id('1').name == 'peter'
16 |     assert config.get_account_for_id('2').name == 'localhost'
17 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools import find_packages
 3 | __version__ = '1.0.9'
 4 | 
 5 | setup(name='deepkit',
 6 |       version=__version__,
 7 |       description='Python SDK for Deepkit',
 8 |       author='Marc J. Schmidt',
 9 |       author_email='marc@marcjschmidt.de',
10 |       url='https://github.com/deepkit/deepkit-python-sdk',
11 |       download_url='https://github.com/deepkit/deepkit-python-sdk/tarball/' + __version__,
12 |       license='MIT',
13 |       packages=find_packages(),
14 |       install_requires=[
15 |           'numpy',
16 |           'Pillow>=4.0.0',
17 |           'rx>=1.5',
18 |           'typedload>=1.20',
19 |           'PyYAML>=5.0.0',
20 |           'psutil>=5.7.0',
21 |           'websockets>=8.1'
22 |       ],
23 |       extras_require={
24 |           'pytorch': ["torch"]
25 |       })
26 | 


--------------------------------------------------------------------------------
/examples/keras-cifar10/deepkit.yml:
--------------------------------------------------------------------------------
 1 | title: TF1 Keras Cifar10 Partial
 2 | list: keras-cifar10
 3 | label: 'keras'
 4 | 
 5 | image: tensorflow/tensorflow:1.15.2-gpu-py3
 6 | build:
 7 |     - pip install --upgrade pip
 8 |     - ADD requirements.txt
 9 |     - pip install -r requirements.txt
10 |     - python -c 'from keras.datasets import cifar10; cifar10.load_data()'
11 |     - ADD ../../deepkit:/deepkit-sdk/deepkit
12 |     - ADD ../../setup.py:/deepkit-sdk/setup.py
13 |     - pip install -e /deepkit-sdk/
14 | 
15 | ignore:
16 |     - report.*
17 |     - saved_models
18 | 
19 | output: saved_models
20 | 
21 | resources:
22 |     minCpu: 2
23 |     minMemory: 2
24 |     minGpu: 1
25 | 
26 | config:
27 |     lr: 0.8
28 |     batch_size: 128
29 |     epochs: 25
30 |     train_samples: 60000
31 |     test_samples: 10000
32 |     data_augmentation: false
33 | 
34 | command: python model.py


--------------------------------------------------------------------------------
/examples/ray/dqn.py:
--------------------------------------------------------------------------------
 1 | import deepkit
 2 | import ray
 3 | from ray.rllib.agents import dqn
 4 | 
 5 | # note: ray overwrites sys.path[0], Dunno why, but that breaks deepkit looking for the project link
 6 | experiment = deepkit.experiment(account='localhost', project='deepkit-python-sdk')
 7 | 
 8 | # Initialize Ray with host that makes docker happy
 9 | ray.init(webui_host='127.0.0.1')
10 | 
11 | # Initialize DQN Trainer with default config and built-in gym cart-pole environment.
12 | trainer = dqn.DQNTrainer(config=dqn.DEFAULT_CONFIG, env="CartPole-v0")
13 | 
14 | # Extract several layers of models
15 | ray_policy = trainer.get_policy()
16 | ray_model = ray_policy.model
17 | # This is the one I think we should "watch"
18 | keras_model = ray_model.base_model
19 | 
20 | experiment.watch_keras_model(keras_model)
21 | 
22 | experiment.log('lets go')
23 | 
24 | # Manually train for a couple of iterations
25 | for i in range(20):
26 |     result = trainer.train()
27 | 
28 | experiment.log('Done')
29 | 


--------------------------------------------------------------------------------
/deepkit/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | def in_self_execution():
 8 |     """
 9 |     Returns true if the script is directly executed without `deepkit` cli.
10 |     """
11 |     return 'DEEPKIT_JOB_ACCESSTOKEN' not in os.environ
12 | 
13 | 
14 | def array_to_img(x, scale=True):
15 |     """
16 |     x should be shape (channels, width, height)
17 |     """
18 |     from PIL import Image
19 |     if x.ndim != 3:
20 |         raise Exception('Unsupported shape : ', str(x.shape), '. Need (channels, width, height)')
21 |     if scale:
22 |         x += max(-np.min(x), 0)
23 |         x /= np.max(x)
24 |         x *= 255
25 |     if x.shape[0] == 3:
26 |         # RGB
27 |         if x.dtype != 'uint8':
28 |             x = x.astype('uint8')
29 |         return Image.fromarray(x.astype('uint8'), 'RGB')
30 |     elif x.shape[0] == 1:
31 |         # grayscale
32 |         if x.dtype != 'uint8':
33 |             x = x.astype('uint8')
34 |         return Image.fromarray(x.reshape(x.shape[1], x.shape[2]), 'L')
35 |     else:
36 |         raise Exception('Unsupported channel number: ', x.shape[0])
37 | 
38 | 
39 | def numpy_to_binary(array):
40 |     buffer = io.BytesIO()
41 | 
42 |     if isinstance(array, np.ndarray):
43 |         np.save(buffer, array)
44 | 
45 |     return buffer.getvalue()
46 | 
47 | 
48 | def get_parameter_by_path(dictionary, path):
49 |     if not dictionary:
50 |         return None
51 | 
52 |     if path in dictionary:
53 |         return dictionary[path]
54 | 
55 |     current = dictionary
56 | 
57 |     for item in path.split('.'):
58 |         if item not in current:
59 |             return None
60 | 
61 |         current = current[item]
62 | 
63 |     return current
64 | 


--------------------------------------------------------------------------------
/examples/dynamic-experiments/threaded.py:
--------------------------------------------------------------------------------
 1 | # this script starts multiple experiments
 2 | import random
 3 | import threading
 4 | from time import sleep
 5 | 
 6 | import deepkit
 7 | 
 8 | experiment_optimization_id = '1'
 9 | 
10 | hyper_parameters_base = {
11 |     'lr': 0.1,
12 |     'optimizer': 'adam',
13 | }
14 | 
15 | root_experiment = deepkit.experiment(project='threaded')
16 | experiments = 10
17 | 
18 | 
19 | class ExperimentExecutor(threading.Thread):
20 |     def __init__(self, id: int, root_experiment: deepkit.Experiment, hyper_parameters: dict):
21 |         super().__init__()
22 |         self.daemon = True
23 |         self.id = id
24 |         self.root_experiment = root_experiment
25 |         self.hyper_parameters = hyper_parameters
26 | 
27 |     def run(self):
28 |         experiment = self.root_experiment.create_sub_experiment()
29 |         experiment.set_info('id', id)
30 |         experiment.set_info('optimization_id', experiment_optimization_id)
31 |         experiment.set_full_config(hyper_parameters)
32 |         experiment.add_file(__file__)
33 | 
34 |         total = 1_000
35 |         for epoch in range(total):
36 |             experiment.log_metric('test', random.gauss(25, 25 / 3), x=epoch)
37 |             experiment.epoch(epoch + 1, total)
38 |             sleep(0.05)
39 | 
40 |         if self.id == 2:
41 |             experiment.set_description('Aborted on purpose')
42 |             experiment.abort()
43 |         else:
44 |             experiment.done()
45 | 
46 |         print(f"Experiment #{self.id} ended.")
47 | 
48 | 
49 | threads = []
50 | for i in range(experiments):
51 |     hyper_parameters = hyper_parameters_base.copy()
52 |     hyper_parameters['lr'] += i * 0.1  # poor man's hyper-parameter optimization :o)
53 | 
54 |     executor = ExperimentExecutor(i, root_experiment, hyper_parameters)
55 |     executor.start()
56 |     threads.append(executor)
57 | 
58 | for executor in threads:
59 |     executor.join()
60 | 
61 | print("All done")
62 | 


--------------------------------------------------------------------------------
/examples/alexnet/model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import keras
 4 | 
 5 | keras.backend.set_image_data_format('channels_first')
 6 | from keras.models import Sequential
 7 | from keras.layers import *
 8 | from keras.utils import np_utils
 9 | import numpy as np
10 | import deepkit
11 | 
12 | experiment = deepkit.experiment()
13 | 
14 | batch_size = 64
15 | nb_classes = 1000
16 | 
17 | img_rows, img_cols = 224, 224
18 | 
19 | if keras.backend.image_data_format() == 'channels_first':
20 |     X_train = np.random.random((batch_size, 3, img_rows, img_cols)).astype('float32')
21 | else:
22 |     X_train = np.random.random((batch_size, img_rows, img_cols, 3)).astype('float32')
23 | Y_train = np.random.random((batch_size,)).astype('int32')
24 | Y_train = np_utils.to_categorical(Y_train, nb_classes)
25 | 
26 | 
27 | def gen():
28 |     while True:
29 |         yield (X_train, Y_train)
30 | 
31 | 
32 | model = Sequential()
33 | model.add(Convolution2D(64, 11, strides=4, padding='valid', input_shape=X_train.shape[1:]))
34 | model.add(Activation('relu'))
35 | model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
36 | model.add(Convolution2D(192, 5, padding='same'))
37 | model.add(Activation('relu'))
38 | model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
39 | 
40 | model.add(Convolution2D(384, 3, padding='same'))
41 | model.add(Activation('relu'))
42 | model.add(Convolution2D(256, 3, padding='same'))
43 | model.add(Activation('relu'))
44 | model.add(Convolution2D(256, 3, padding='same'))
45 | model.add(Activation('relu'))
46 | model.add(MaxPooling2D(pool_size=(3, 3), strides=2))
47 | 
48 | model.add(Flatten())
49 | model.add(Dense(4096))
50 | model.add(Activation('relu'))
51 | model.add(Dropout(0.5))
52 | model.add(Dense(4096))
53 | model.add(Activation('relu'))
54 | model.add(Dropout(0.5))
55 | model.add(Dense(nb_classes))
56 | model.add(Activation('softmax'))
57 | 
58 | for l in model.layers:
59 |     print(l.input_shape, l.output_shape)
60 | 
61 | # Let's train the model using RMSprop
62 | model.compile(loss='categorical_crossentropy',
63 |               optimizer='rmsprop',
64 |               metrics=['accuracy'])
65 | 
66 | model.fit_generator(gen(), epochs=100, steps_per_epoch=200, callbacks=[experiment.create_keras_callback(next(gen())[0])])
67 | 


--------------------------------------------------------------------------------
/deepkit/model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import NamedTuple, Optional, List
 3 | 
 4 | 
 5 | class ExperimentOptions(NamedTuple):
 6 |     """
 7 |     Per default the account linked to this folder is used (see `deepkit link`), this is on a new system `localhost`. 
 8 |     You can overwrite which account is used by specifying the name here (see `deepkit id` for 
 9 |     available accounts in your system). 
10 |     """
11 |     account: Optional[str] = None
12 | 
13 |     """
14 |     Per default the project linked to this folder is used (see `deepkit link`). 
15 |     You can overwrite which proect is used. 
16 |     Names is format of either `my-project`, or `user/my-project`, or `org/my-project`.
17 |     
18 |     If the current folder is not linked and you don't specify a project here, an error is raised since
19 |     Deepkit isn't able to know to which project the experiments data should be sent.
20 |     """
21 |     project: Optional[str] = None
22 | 
23 | 
24 | class Account(NamedTuple):
25 |     id: str
26 |     port: int
27 |     ssl: bool
28 |     username: str
29 |     token: str
30 |     host: str
31 |     name: str
32 | 
33 | 
34 | class FolderLink(NamedTuple):
35 |     accountId: str
36 |     name: str
37 |     path: str
38 |     projectId: str
39 | 
40 | 
41 | class HomeConfig(NamedTuple):
42 |     accounts: List[Account]
43 |     folderLinks: List[FolderLink]
44 | 
45 |     def get_first_account(self) -> Account:
46 |         if len(self.accounts) is 0: raise Exception(f'No Deepkit accounts configured.')
47 |         return self.accounts[0]
48 | 
49 |     def get_account_for_name(self, name: str) -> Account:
50 |         for account in self.accounts:
51 |             if account.name == name:
52 |                 return account
53 |         raise Exception(f'No account for name {name} configured. Use `deepkit login` to add new accounts.')
54 | 
55 |     def get_account_for_id(self, id: str) -> Account:
56 |         for account in self.accounts:
57 |             if account.id == id:
58 |                 return account
59 |         raise Exception(f'No account for id {id}')
60 | 
61 |     def get_folder_link_of_directory(self, dir: str) -> FolderLink:
62 |         link_map = {}
63 |         for item in self.folderLinks:
64 |             link_map[item.path] = item
65 | 
66 |         while dir not in link_map:
67 |             dir = os.path.realpath(os.path.join(dir, '..'))
68 |             if dir == os.path.realpath(os.path.join(dir, '..')):
69 |                 # reached root
70 |                 break
71 | 
72 |         if dir in link_map:
73 |             return link_map[dir]
74 | 
75 |         raise Exception('No project linked for folder ' + dir)
76 | 


--------------------------------------------------------------------------------
/examples/tf2-keras-mnist/model.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | import tensorflow as tf
 4 | from tensorflow.keras import Model, layers, optimizers, datasets
 5 | import deepkit
 6 | 
 7 | experiment = deepkit.experiment()
 8 | experiment.add_file('model.py')
 9 | 
10 | (x, y), (x_val, y_val) = datasets.fashion_mnist.load_data()
11 | x = x.reshape(x.shape[0], 28, 28, 1)
12 | x_val = x_val.reshape(x_val.shape[0], 28, 28, 1)
13 | x = x / 255.0
14 | y = tf.one_hot(y, depth=10, dtype=tf.float32)
15 | y_val = tf.one_hot(y_val, depth=10)
16 | print('x/y shape:', x.shape, y.shape)
17 | 
18 | 
19 | def train_gen():
20 |     global x, y
21 |     for x2, y2 in zip(x, y):
22 |         yield (x2, x2), y2
23 |         # yield x2, y2
24 | 
25 | 
26 | train_dataset = tf.data.Dataset.from_generator(
27 |     train_gen,
28 |     ((tf.float32, tf.float32), tf.float32),
29 |     # (tf.TensorShape([28, 28]), tf.TensorShape([10]))
30 |     ((tf.TensorShape([28, 28, 1]), tf.TensorShape([28, 28, 1])), tf.TensorShape([10]))
31 | )
32 | train_dataset = train_dataset.batch(100)
33 | # val_dataset = train_dataset.batch(10)
34 | 
35 | # train_dataset, val_dataset = mnist_dataset()
36 | 
37 | # resnet = tf.keras.applications.ResNet50(
38 | #     include_top=True,
39 | #     weights=None,
40 | #     input_tensor=None,
41 | #     input_shape=None,
42 | #     pooling=None,
43 | #     classes=10
44 | # )
45 | 
46 | # model = tf.keras.Sequential([
47 | #     resnet,
48 | #     layers.Dense(10, name='asd')
49 | # ])
50 | 
51 | input1 = layers.Input((28, 28, 1))
52 | input2 = layers.Input((28, 28, 1))
53 | 
54 | conv1 = layers.Convolution2D(64, (1, 1), activation='relu')(input1)
55 | conv2 = layers.Convolution2D(64, (1, 1), activation='relu')(conv1)
56 | rs1 = layers.Flatten()(conv2)
57 | rs2 = layers.Flatten()(input2)
58 | 
59 | d1 = layers.Dense(64, activation='relu')(rs1)
60 | d2 = layers.Dense(64, activation='relu')(rs2)
61 | c1 = layers.Concatenate()([d1, d2])
62 | d3 = layers.Dense(64, name='YoloDense', activation='relu')(c1)
63 | 
64 | output1 = layers.Dense(10)(d3)
65 | model = Model(inputs=[input1, input2], outputs=[output1])
66 | 
67 | model.summary()
68 | 
69 | experiment.watch_keras_model(model)
70 | deepkit_callback = experiment.create_keras_callback()
71 | 
72 | log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
73 | tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
74 | # no need to use compile if you have no loss/optimizer/metrics involved here.
75 | model.compile(optimizer=optimizers.Adam(0.001),
76 |               loss=tf.losses.CategoricalCrossentropy(from_logits=True),
77 |               metrics=['accuracy'])
78 | 
79 | model.fit(train_dataset.repeat(), epochs=30, steps_per_epoch=500,
80 |           validation_data=train_dataset.repeat(),
81 |           validation_steps=2,
82 |           callbacks=[tensorboard_callback, deepkit_callback]
83 |           )
84 | 


--------------------------------------------------------------------------------
/examples/torch/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | bla = 10
  6 | 
  7 | 
  8 | def my_activation(x):
  9 |     return x ** 2 - 1
 10 | 
 11 | 
 12 | class BasicBlock(nn.Module):
 13 |     expansion = 1
 14 | 
 15 |     def __init__(self, in_planes, planes, stride=1):
 16 |         super(BasicBlock, self).__init__()
 17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 18 |         self.bn1 = nn.BatchNorm2d(planes)
 19 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 | 
 22 |         self.shortcut = nn.Sequential()
 23 |         if stride != 1 or in_planes != self.expansion * planes:
 24 |             self.shortcut = nn.Sequential(
 25 |                 nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
 26 |                 nn.BatchNorm2d(self.expansion * planes)
 27 |             )
 28 | 
 29 |     def ups(self, x):
 30 |         i = 1 + 1.9 - 1 / bla
 31 |         return x * i
 32 | 
 33 |     def forward(self, x):
 34 |         out = self.bn2(self.conv2(F.relu(self.bn1(self.conv1(x)))))
 35 |         out += self.shortcut(x)
 36 |         out = F.relu(out)
 37 |         return out
 38 | 
 39 | 
 40 | class Bottleneck(nn.Module):
 41 |     expansion = 4
 42 | 
 43 |     def __init__(self, in_planes, planes, stride=1):
 44 |         super(Bottleneck, self).__init__()
 45 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 46 |         self.bn1 = nn.BatchNorm2d(planes)
 47 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 48 |         self.bn2 = nn.BatchNorm2d(planes)
 49 |         self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False)
 50 |         self.bn3 = nn.BatchNorm2d(self.expansion * planes)
 51 | 
 52 |         self.shortcut = nn.Sequential()
 53 |         if stride != 1 or in_planes != self.expansion * planes:
 54 |             self.shortcut = nn.Sequential(
 55 |                 nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
 56 |                 nn.BatchNorm2d(self.expansion * planes)
 57 |             )
 58 | 
 59 |     def forward(self, x):
 60 |         out = F.relu(self.bn1(self.conv1(x)))
 61 |         out = F.relu(self.bn2(self.conv2(out)))
 62 |         out = self.bn3(self.conv3(out))
 63 |         out += self.shortcut(x)
 64 |         out = F.relu(out)
 65 |         return out
 66 | 
 67 | 
 68 | class ResNet(nn.Module):
 69 |     def __init__(self, block, num_blocks, num_classes=10):
 70 |         super(ResNet, self).__init__()
 71 |         self.in_planes = 64
 72 | 
 73 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 74 |         self.bn1 = nn.BatchNorm2d(64)
 75 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 76 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 77 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 78 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 79 |         self.linear = nn.Linear(512 * block.expansion, num_classes)
 80 | 
 81 |     def _make_layer(self, block, planes, num_blocks, stride):
 82 |         strides = [stride] + [1] * (num_blocks - 1)
 83 |         layers = []
 84 |         for stride in strides:
 85 |             layers.append(block(self.in_planes, planes, stride))
 86 |             self.in_planes = planes * block.expansion
 87 |         return nn.Sequential(*layers)
 88 | 
 89 |     def forward(self, x):
 90 |         out = F.relu(self.bn1(self.conv1(x)))
 91 |         out = self.layer1(out)
 92 |         out = self.layer2(out)
 93 |         out = self.layer3(out)
 94 |         out = self.layer4(out)
 95 |         out = F.avg_pool2d(out, 4)
 96 |         out = out.view(out.size(0), -1)
 97 |         out = self.linear(out)
 98 |         return out
 99 | 
100 | 
101 | def ResNet18():
102 |     return ResNet(BasicBlock, [2, 2, 2, 2])
103 | 
104 | 
105 | def ResNet34():
106 |     return ResNet(BasicBlock, [3, 4, 6, 3])
107 | 
108 | 
109 | def ResNet50():
110 |     return ResNet(Bottleneck, [3, 4, 6, 3])
111 | 
112 | 
113 | def ResNet101():
114 |     return ResNet(Bottleneck, [3, 4, 23, 3])
115 | 
116 | 
117 | def ResNet152():
118 |     return ResNet(Bottleneck, [3, 8, 36, 3])
119 | 


--------------------------------------------------------------------------------
/examples/torch/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torchvision
  5 | import torchvision.transforms as transforms
  6 | from torch import nn, optim
  7 | from torch.backends import cudnn
  8 | 
  9 | import deepkit
 10 | from examples.torch.resnet import ResNet18
 11 | 
 12 | experiment = deepkit.experiment()
 13 | 
 14 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
 15 | best_acc = 0  # best test accuracy
 16 | start_epoch = experiment.intconfig('start_epoch', 0)  # start from epoch 0 or last checkpoint epoch
 17 | 
 18 | batch_size = experiment.intconfig('batch_size', 32)
 19 | 
 20 | # Data
 21 | print('==> Preparing data..')
 22 | transform_train = transforms.Compose([
 23 |     transforms.RandomCrop(32, padding=4),
 24 |     transforms.RandomHorizontalFlip(),
 25 |     transforms.ToTensor(),
 26 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 27 | ])
 28 | 
 29 | transform_test = transforms.Compose([
 30 |     transforms.ToTensor(),
 31 |     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 32 | ])
 33 | 
 34 | trainset = torchvision.datasets.CIFAR10(root='~/.data', train=True, download=True, transform=transform_train)
 35 | trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0)
 36 | 
 37 | testset = torchvision.datasets.CIFAR10(root='~/.data', train=False, download=True, transform=transform_test)
 38 | testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=0)
 39 | 
 40 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
 41 | 
 42 | # Model
 43 | print('==> Building model..')
 44 | # net = VGG('VGG19')
 45 | net = ResNet18()
 46 | # net = ResNet152()
 47 | # net = PreActResNet18()
 48 | # net = GoogLeNet()
 49 | # net = DenseNet121()
 50 | # net = ResNeXt29_2x64d()
 51 | # net = MobileNet()
 52 | # net = MobileNetV2()
 53 | # net = DPN92()
 54 | # net = ShuffleNetG2()
 55 | # net = SENet18()
 56 | # net = ShuffleNetV2(1)
 57 | # net = EfficientNetB0()
 58 | if device == 'cuda':
 59 |     net = torch.nn.DataParallel(net)
 60 |     cudnn.benchmark = True
 61 | 
 62 | if experiment.boolconfig('resume'):
 63 |     # Load checkpoint.
 64 |     print('==> Resuming from checkpoint..')
 65 |     assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
 66 |     checkpoint = torch.load('./checkpoint/ckpt.pth')
 67 |     net.load_state_dict(checkpoint['net'])
 68 |     best_acc = checkpoint['acc']
 69 |     start_epoch = checkpoint['epoch']
 70 | 
 71 | criterion = nn.CrossEntropyLoss()
 72 | optimizer = optim.SGD(net.parameters(), lr=experiment.floatconfig('lr', 0.1), momentum=0.9, weight_decay=5e-4)
 73 | 
 74 | experiment.watch_torch_model(net)
 75 | 
 76 | 
 77 | # Training
 78 | def train(epoch):
 79 |     print('\nEpoch: %d' % epoch)
 80 |     net.train()
 81 |     train_loss = 0
 82 |     correct = 0
 83 |     total = 0
 84 |     total_batches = len(trainloader)
 85 |     for batch_idx, (inputs, targets) in enumerate(trainloader):
 86 |         inputs, targets = inputs.to(device), targets.to(device)
 87 |         optimizer.zero_grad()
 88 |         outputs = net(inputs)
 89 |         loss = criterion(outputs, targets)
 90 |         loss.backward()
 91 |         optimizer.step()
 92 | 
 93 |         train_loss += loss.item()
 94 |         _, predicted = outputs.max(1)
 95 |         total += targets.size(0)
 96 |         correct += predicted.eq(targets).sum().item()
 97 | 
 98 |         experiment.batch(batch_idx, total_batches, targets.size(0))
 99 |         experiment.log_metric('loss/train', epoch + (batch_idx / total_batches), (train_loss / (batch_idx + 1)))
100 |         experiment.log_metric('accuracy/train', epoch + (batch_idx / total_batches), correct / total)
101 |         # progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
102 |         #              % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
103 | 
104 | 
105 | def test(epoch):
106 |     global best_acc
107 |     net.eval()
108 |     test_loss = 0
109 |     correct = 0
110 |     total = 0
111 |     with torch.no_grad():
112 |         for batch_idx, (inputs, targets) in enumerate(testloader):
113 |             inputs, targets = inputs.to(device), targets.to(device)
114 |             outputs = net(inputs)
115 |             loss = criterion(outputs, targets)
116 | 
117 |             test_loss += loss.item()
118 |             _, predicted = outputs.max(1)
119 |             total += targets.size(0)
120 |             correct += predicted.eq(targets).sum().item()
121 | 
122 |             experiment.log_metric('loss/val', epoch, test_loss / (batch_idx + 1))
123 |             experiment.log_metric('accuracy/val', epoch, correct / total)
124 |             # progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
125 |             #              % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total))
126 | 
127 |     # Save checkpoint.
128 |     acc = 100. * correct / total
129 |     if acc > best_acc:
130 |         print('Saving..')
131 |         state = {
132 |             'net': net.state_dict(),
133 |             'acc': acc,
134 |             'epoch': epoch,
135 |         }
136 |         if not os.path.isdir('checkpoint'):
137 |             os.mkdir('checkpoint')
138 |         torch.save(state, './checkpoint/ckpt.pth')
139 |         best_acc = acc
140 | 
141 | 
142 | for epoch in range(start_epoch, start_epoch + 200):
143 |     experiment.epoch(epoch, 200)
144 |     train(epoch)
145 |     test(epoch)
146 | 


--------------------------------------------------------------------------------
/deepkit/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import deepkit.globals
  5 | import deepkit.utils
  6 | from deepkit.client import Client
  7 | from deepkit.experiment import Experiment, ExperimentOptions
  8 | import getpass
  9 | 
 10 | from deepkit.home import get_home_config
 11 | 
 12 | 
 13 | def log(s):
 14 |     if deepkit.globals.last_experiment:
 15 |         deepkit.globals.last_experiment.log(s)
 16 |     else:
 17 |         deepkit.globals.last_logs.write(s)
 18 | 
 19 | 
 20 | def experiment(project=None, account=None) -> Experiment:
 21 |     """
 22 |     Per default this method returns a singleton.
 23 | 
 24 |     If you start an experiment using the Deepkit cli (`deepkit run`) or the Deepkit app, the experiment
 25 |     is created beforehand and this method picks it up. If an experiment is run without cli or app,
 26 |     then this method creates a new one. In any case, this method returns always the same instance, so
 27 |     you don't strictly need to save or pass around its return value.
 28 | 
 29 |     If you want to create new sub experiments you should use:
 30 | 
 31 |         import deepkit
 32 |         root_experiment = deepkit.experiment()
 33 |         sub_experiment = root_experiment.create_sub_experiment()
 34 | 
 35 |     This will create _always_ a new child experiments. In this cases, make sure to call `experiment.done()`,
 36 |     (or abort, crashed, failed) manually to end the created experiment and pass around the created experiment
 37 |     instance manually (since its not tracked).
 38 | 
 39 |     :param project: If the current folder is not linked and you don't specify a project here, an error is raised since
 40 |                     Deepkit isn't able to know to which project the experiments data should be sent.
 41 |     :param account: Per default the first account linked to this folder is used (see `deepkit link` or `deepkit-sdk auth -l`),
 42 |                     this is on a new system `localhost`.
 43 |                     You can overwrite which account is used by specifying the name here (see `deepkit id` for
 44 |                     available accounts in your system).
 45 |     :return:
 46 |     """
 47 |     """
 48 |     :return: returns either a new experiment or the last created one.
 49 |     """
 50 |     if not deepkit.globals.last_experiment:
 51 |         deepkit.globals.last_experiment = Experiment(project=project, account=account, monitoring=True,
 52 |                                                      try_pick_up=True)
 53 | 
 54 |     return deepkit.globals.last_experiment
 55 | 
 56 | 
 57 | if deepkit.utils.in_self_execution():
 58 |     class StdHook:
 59 |         def __init__(self, s):
 60 |             self.s = s
 61 | 
 62 |         def fileno(self):
 63 |             return self.s.fileno()
 64 | 
 65 |         def isatty(self):
 66 |             return self.s.isatty()
 67 | 
 68 |         def flush(self):
 69 |             self.s.flush()
 70 | 
 71 |         def write(self, s):
 72 |             self.s.write(s)
 73 |             log(s)
 74 | 
 75 | 
 76 |     sys.stdout = StdHook(sys.__stdout__)
 77 |     sys.stderr = StdHook(sys.__stderr__)
 78 | 
 79 | 
 80 | def get_credentials():
 81 |     username = input("Username: ")
 82 |     try:
 83 |         password = getpass.getpass()
 84 |         return username, password
 85 |     except Exception as error:
 86 |         print('ERROR', error)
 87 | 
 88 | 
 89 | access_key_map = dict()
 90 | 
 91 | 
 92 | def access_key_map_cache_key(host, port, ssl):
 93 |     return host + '-' + str(port) + str(ssl)
 94 | 
 95 | 
 96 | def login(
 97 |         access_key=None,
 98 |         host='app.deepkit.ai',
 99 |         port=443,
100 |         ssl=True,
101 | ):
102 |     """
103 |     In environments (like Jupyter Notebooks/Google Colab) where its not possible to use the Deepkit CLI to authenticate
104 |     with a Deepkit server (deepkit auth) or where "deepkit run" is not used, it's required to provide an access-key
105 |     or login via username/password.
106 | 
107 |     It's important to call this method BEFORE deepkit.experiment() is called.
108 |     """
109 |     if host is 'localhost':
110 |         ssl = False
111 | 
112 |         if port == 443:
113 |             port = 8960
114 | 
115 |         try:
116 |             config = get_home_config()
117 |             account_config = config.get_account_for_name('localhost')
118 |             access_key = account_config.token
119 |         except Exception:
120 |             pass
121 | 
122 |     if access_key is None:
123 |         cache_key = access_key_map_cache_key(host, port, ssl)
124 |         if cache_key in access_key_map:
125 |             access_key = access_key_map[cache_key]
126 |         else:
127 |             print("No access_key provided. Please provide username and password.")
128 |             print(
129 |                 f"Note: You can create an access_key directly in the CLI using `deepkit access-key {host} --port {port}`")
130 |             client = Client()
131 |             client.host = host
132 |             client.port = port
133 |             client.ssl = ssl
134 | 
135 |             username, password = get_credentials()
136 | 
137 |             print(f"Connecting {client.host}:{client.port}")
138 |             client.connect_anon()
139 |             access_key = client.app_action_threadsafe('login', [username, password]).result()
140 |             if not access_key:
141 |                 raise Exception("Credentials check failed")
142 | 
143 |             print("Login successful. Access key is " + access_key)
144 |             access_key_map[cache_key] = access_key
145 | 
146 |     os.environ['DEEPKIT_HOST'] = host
147 |     os.environ['DEEPKIT_SSL'] = '1' if ssl else '0'
148 |     os.environ['DEEPKIT_PORT'] = str(port)
149 | 
150 |     if 'DEEPKIT_JOB_ACCESSTOKEN' in os.environ:
151 |         del os.environ['DEEPKIT_JOB_ACCESSTOKEN']
152 | 
153 |     if 'DEEPKIT_JOB_ID' in os.environ:
154 |         del os.environ['DEEPKIT_JOB_ID']
155 | 
156 |     os.environ['DEEPKIT_ACCESSTOKEN'] = access_key
157 | 


--------------------------------------------------------------------------------
/deepkit/debugger.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import time
  3 | from typing import NamedTuple, List, Dict
  4 | 
  5 | import PIL.Image
  6 | 
  7 | from deepkit.utils.image import pil_image_to_jpeg
  8 | 
  9 | 
 10 | class DebuggerFetchItem(NamedTuple):
 11 |     name: str
 12 |     output: any
 13 |     ahistogram: any
 14 |     whistogram: any
 15 |     bhistogram: any
 16 | 
 17 | 
 18 | class DebuggerFetchConfig(NamedTuple):
 19 |     x: int
 20 |     layers: List[str]
 21 |     all: bool
 22 | 
 23 |     def needs_fetch(self, name: str) -> bool:
 24 |         if self.all: return True
 25 |         return name in self.layers
 26 | 
 27 | 
 28 | class DebuggerManager:
 29 |     def __init__(self, experiment):
 30 |         import deepkit
 31 |         self.experiment: deepkit.Experiment = experiment
 32 | 
 33 |         self.live_last_sent = time.time()
 34 |         self.x = 0
 35 |         self.record_snapshot_created = False
 36 |         self.record_last_sent = time.time()
 37 |         self.record_last_epoch = 0
 38 |         self.debuggers = []
 39 |         self.active_debug_data_for_this_run = False
 40 |         self.record_needed = False
 41 |         self.live_needed = False
 42 |         self.send_data_futures = []
 43 | 
 44 |     def register_debugger(self, debugger):
 45 |         self.debuggers.append(debugger)
 46 | 
 47 |     def on_disconnect(self):
 48 |         for f in self.send_data_futures:
 49 |             f.set_result(False)
 50 | 
 51 |         self.send_data_futures = []
 52 | 
 53 |     def create_snapshot(self, x, layers):
 54 |         self.experiment.client.job_action_threadsafe('addSnapshot', [
 55 |             x,
 56 |             time.time(),
 57 |             layers,
 58 |             self.experiment.job_iteration,
 59 |             self.experiment.job_step,
 60 |         ])
 61 | 
 62 |     def tick(self):
 63 |         """
 64 |         Checks whether a new snapshot or live data needs to be fetched and sent. If so we trigger on each debugger
 65 |         instance a fetch() call and send that data to the server.
 66 |         """
 67 |         if self.active_debug_data_for_this_run: return
 68 |         if not self.experiment.client.is_connected(): return
 69 | 
 70 |         state = self.experiment.debugger_controller.state
 71 |         if not state: return
 72 | 
 73 |         self.record_needed = state.recording
 74 |         fetch_all = False
 75 | 
 76 |         if state.recordingMode == 'second':
 77 |             diff = time.time() - self.record_last_sent
 78 |             if diff <= state.recordingSecond:
 79 |                 # not enough time past, wait for next call
 80 |                 self.record_needed = False
 81 | 
 82 |         if state.recordingMode == 'epoch':
 83 |             # if not epoch_end: record_needed = False
 84 |             if self.experiment.job_iteration == self.record_last_epoch:
 85 |                 # nothing to do for records
 86 |                 self.record_needed = False
 87 |             self.record_last_epoch = self.experiment.job_iteration
 88 | 
 89 |         self.live_needed = state.live and (time.time() - self.live_last_sent) > 1
 90 |         layers = list(state.watchingLayers.keys())
 91 | 
 92 |         if not self.live_needed and not self.record_needed:
 93 |             return
 94 | 
 95 |         self.active_debug_data_for_this_run = True
 96 |         self.record_snapshot_created = False
 97 | 
 98 |         if self.record_needed and state.recordingLayers == 'all':
 99 |             fetch_all = True
100 | 
101 |         # wait for all previous to be sent first.
102 |         try:
103 |             for f in self.send_data_futures: f.result()
104 |         except Exception as e:
105 |             print('Failing sending debug data', e)
106 |             pass
107 | 
108 |         self.x += 1
109 | 
110 |         fetch_config = DebuggerFetchConfig(x=self.x, layers=layers, all=fetch_all)
111 | 
112 |         fetch_layers: Dict[str, DebuggerFetchItem] = dict()
113 |         for debugger in self.debuggers:
114 |             fetch_layers.update(debugger.fetch(fetch_config))
115 | 
116 |         if self.record_needed and len(fetch_layers):
117 |             self.create_snapshot(self.x, list(fetch_layers.keys()))
118 | 
119 |         for fetch in fetch_layers.values():
120 |             output = fetch.output
121 |             output_image = None
122 |             if isinstance(fetch.output, PIL.Image.Image):
123 |                 output = None
124 |                 output_image = base64.b64encode(pil_image_to_jpeg(fetch.output)).decode()
125 | 
126 |             if self.record_needed:
127 |                 self.send_data_futures.append(self.experiment.client.job_action_threadsafe('setSnapshotLayerData', [
128 |                     fetch_config.x,
129 |                     self.live_needed,
130 |                     fetch.name,
131 |                     output,
132 |                     output_image,
133 |                     base64.b64encode(fetch.ahistogram).decode() if fetch.ahistogram else None,
134 |                     base64.b64encode(fetch.whistogram).decode() if fetch.whistogram else None,
135 |                     base64.b64encode(fetch.bhistogram).decode() if fetch.bhistogram else None,
136 |                 ]))
137 |             else:
138 |                 self.send_data_futures.append(self.experiment.client.job_action_threadsafe('addLiveLayerData', [
139 |                     fetch.name,
140 |                     output,
141 |                     output_image,
142 |                     base64.b64encode(fetch.ahistogram).decode() if fetch.ahistogram else None,
143 |                     base64.b64encode(fetch.whistogram).decode() if fetch.whistogram else None,
144 |                     base64.b64encode(fetch.bhistogram).decode() if fetch.bhistogram else None,
145 |                 ]))
146 | 
147 |         self.live_last_sent = time.time()
148 | 
149 |         self.active_debug_data_for_this_run = False
150 | 
151 |         if self.record_needed:
152 |             self.record_last_sent = time.time()
153 | 
154 |         if self.live_needed:
155 |             self.live_last_sent = time.time()
156 | 
157 |         self.record_needed = False
158 |         self.live_needed = False
159 |         self.record_snapshot_created = False
160 | 


--------------------------------------------------------------------------------
/examples/keras-cifar10/model.py:
--------------------------------------------------------------------------------
  1 | '''Train a simple deep CNN on the CIFAR10 small images dataset.
  2 | It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs.
  3 | (it's still underfitting at that point, though).
  4 | '''
  5 | import os
  6 | 
  7 | # os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
  8 | # os.environ["RUNFILES_DIR"] = "/usr/local/share/plaidml"
  9 | # os.environ["PLAIDML_NATIVE_PATH"] = "/usr/local/lib/libplaidml.dylib"
 10 | 
 11 | import keras
 12 | from keras.datasets import cifar10
 13 | from keras.preprocessing.image import ImageDataGenerator
 14 | from keras.models import Sequential
 15 | from keras.layers import Dense, Dropout, Flatten
 16 | from keras.layers import Conv2D, MaxPooling2D, Activation
 17 | 
 18 | import deepkit
 19 | 
 20 | experiment = deepkit.experiment()
 21 | experiment.add_file(__file__)
 22 | 
 23 | batch_size = experiment.intconfig('batch_size', 16)
 24 | num_classes = 10
 25 | epochs = experiment.intconfig('epochs', 15)
 26 | data_augmentation = experiment.boolconfig('data_augmentation', False)
 27 | num_predictions = 20
 28 | 
 29 | save_dir = os.path.join(os.getcwd(), 'saved_models')
 30 | model_name = 'keras_cifar10_trained_model.h5'
 31 | 
 32 | # The data, split between train and test sets:
 33 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
 34 | 
 35 | labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
 36 | 
 37 | x_train = x_train[0:experiment.intconfig('train_samples', 10000)]
 38 | y_train = y_train[0:experiment.intconfig('train_samples', 10000)]
 39 | 
 40 | x_test = x_test[0:experiment.intconfig('test_samples', 10000)]
 41 | y_test = y_test[0:experiment.intconfig('test_samples', 10000)]
 42 | 
 43 | experiment.log_insight(*x_train[0:50], name='samples/train/sample')
 44 | 
 45 | for i, x in enumerate(x_test[0:20]):
 46 |     experiment.log_insight(x, name='samples/test/sample_' + str(i), meta=labels[y_test[i][0]])
 47 | 
 48 | experiment.log_insight({'my-data': 123, 'more': True}, name='json-like/sample1')
 49 | experiment.log_insight({'my-data': 234, 'more': False}, name='json-like/sample2')
 50 | experiment.log_insight(12312312.333, name='json-like/sample3')
 51 | experiment.log_insight("This is just text\nYay.", name='json-like/sample4')
 52 | experiment.log_insight(
 53 |     "Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's "
 54 |     "standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make "
 55 |     "a type specimen book. It has survived not only five centuries.",
 56 |     name='json-like/sample5')
 57 | experiment.log_insight(x_test[0], name='numpy-shizzle/sample1', image_convertion=False)
 58 | experiment.log_insight(x_test[1], name='numpy-shizzle/sample2', image_convertion=False)
 59 | experiment.log_insight(x_test[2], name='numpy-shizzle/sample3', image_convertion=False)
 60 | experiment.log_insight(x_test[3], name='numpy-shizzle/sample4', image_convertion=False)
 61 | experiment.log_insight(y_test[0:50], name='numpy-shizzle/y_test', image_convertion=False)
 62 | 
 63 | print('x_train shape:', x_train.shape)
 64 | print(x_train.shape[0], 'train samples')
 65 | print(x_test.shape[0], 'test samples')
 66 | print(x_test.shape[0], 'test samples')
 67 | 
 68 | # Convert class vectors to binary class matrices.
 69 | y_train = keras.utils.to_categorical(y_train, num_classes)
 70 | y_test = keras.utils.to_categorical(y_test, num_classes)
 71 | 
 72 | model = Sequential()
 73 | model.add(Conv2D(12, kernel_size=(3, 3), input_shape=x_train.shape[1:]))
 74 | model.add(Activation('relu'))
 75 | model.add(Conv2D(64, (3, 3)))
 76 | model.add(Activation('relu'))
 77 | model.add(MaxPooling2D(pool_size=(2, 2)))
 78 | model.add(Dropout(0.25))
 79 | 
 80 | model.add(Conv2D(64, (3, 3), padding='same'))
 81 | model.add(Activation('relu'))
 82 | model.add(Conv2D(64, (3, 3)))
 83 | model.add(Activation('relu'))
 84 | model.add(MaxPooling2D(pool_size=(2, 2)))
 85 | model.add(Dropout(0.25))
 86 | 
 87 | model.add(Flatten())
 88 | model.add(Dense(512))
 89 | model.add(Activation('relu'))
 90 | model.add(Dropout(0.5))
 91 | model.add(Dense(num_classes))
 92 | model.add(Activation('softmax'))
 93 | 
 94 | opt = keras.optimizers.Adadelta(lr=experiment.floatconfig('lr', 0.1))
 95 | 
 96 | deepkit_callback = experiment.create_keras_callback(model)
 97 | 
 98 | callbacks = [deepkit_callback]
 99 | 
100 | # Let's train the model using RMSprop
101 | model.compile(loss='categorical_crossentropy',
102 |               optimizer=opt,
103 |               metrics=['accuracy'])
104 | 
105 | model.summary()
106 | 
107 | x_train = x_train.astype('float32')
108 | x_test = x_test.astype('float32')
109 | x_train /= 255
110 | x_test /= 255
111 | 
112 | if not data_augmentation:
113 |     print('Not using data augmentation.')
114 |     model.fit(x_train, y_train,
115 |               batch_size=batch_size,
116 |               epochs=epochs,
117 |               callbacks=callbacks,
118 |               validation_data=(x_test, y_test),
119 |               shuffle=True)
120 | else:
121 |     print('Using real-time data augmentation.')
122 |     # This will do preprocessing and realtime data augmentation:
123 |     datagen = ImageDataGenerator(
124 |         featurewise_center=False,  # set input mean to 0 over the dataset
125 |         samplewise_center=False,  # set each sample mean to 0
126 |         featurewise_std_normalization=False,  # divide inputs by std of the dataset
127 |         samplewise_std_normalization=False,  # divide each input by its std
128 |         zca_whitening=False,  # apply ZCA whitening
129 |         zca_epsilon=1e-06,  # epsilon for ZCA whitening
130 |         rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
131 |         # randomly shift images horizontally (fraction of total width)
132 |         width_shift_range=0.1,
133 |         # randomly shift images vertically (fraction of total height)
134 |         height_shift_range=0.1,
135 |         shear_range=0.,  # set range for random shear
136 |         zoom_range=0.,  # set range for random zoom
137 |         channel_shift_range=0.,  # set range for random channel shifts
138 |         # set mode for filling points outside the input boundaries
139 |         fill_mode='nearest',
140 |         cval=0.,  # value used for fill_mode = "constant"
141 |         horizontal_flip=True,  # randomly flip images
142 |         vertical_flip=False,  # randomly flip images
143 |         # set rescaling factor (applied before any other transformation)
144 |         rescale=None,
145 |         # set function that will be applied on each input
146 |         preprocessing_function=None,
147 |         # image data format, either "channels_first" or "channels_last"
148 |         data_format=None,
149 |         # fraction of images reserved for validation (strictly between 0 and 1)
150 |         validation_split=0.0)
151 | 
152 |     # Compute quantities required for feature-wise normalization
153 |     # (std, mean, and principal components if ZCA whitening is applied).
154 |     datagen.fit(x_train)
155 | 
156 |     # Fit the model on the batches generated by datagen.flow().
157 |     model.fit_generator(datagen.flow(x_train, y_train,
158 |                                      batch_size=batch_size),
159 |                         epochs=epochs,
160 |                         steps_per_epoch=len(x_train)/batch_size,
161 |                         verbose=0,
162 |                         callbacks=callbacks,
163 |                         validation_data=(x_test, y_test),
164 |                         workers=4)
165 | 
166 | # Save model and weights
167 | if not os.path.isdir(save_dir):
168 |     os.makedirs(save_dir)
169 | model_path = os.path.join(save_dir, model_name)
170 | model.save(model_path)
171 | print('Saved trained model at %s ' % model_path)
172 | 
173 | # Score trained model.
174 | scores = model.evaluate(x_test, y_test, verbose=1)
175 | print('Test loss:', scores[0])
176 | print('Test accuracy:', scores[1])
177 | 


--------------------------------------------------------------------------------
/deepkit/deepkit_keras.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | 
  4 | import math
  5 | import os
  6 | import sys
  7 | import time
  8 | import numpy as np
  9 | 
 10 | if 'keras' in sys.modules:
 11 |     import keras
 12 | else:
 13 |     import tensorflow.keras as keras
 14 | 
 15 | import deepkit
 16 | 
 17 | 
 18 | def is_generator(obj):
 19 |     import inspect
 20 | 
 21 |     return obj is not None and (
 22 |             inspect.isgeneratorfunction(obj)
 23 |             or inspect.isgenerator(obj) or hasattr(obj, 'next') or hasattr(obj, '__next__'))
 24 | 
 25 | 
 26 | def ensure_dir(d):
 27 |     if not os.path.isdir(d):
 28 |         if os.path.isfile(d):  # but a file, so delete it
 29 |             print("Deleted", d, "because it was a file, but needs to be an directory.")
 30 |             os.remove(d)
 31 | 
 32 |         os.makedirs(d)
 33 | 
 34 | 
 35 | def get_total_params(model):
 36 |     total_params = 0
 37 | 
 38 |     flattened_layers = model.flattened_layers if hasattr(model, 'flattened_layers') else model.layers
 39 | 
 40 |     for i in range(len(flattened_layers)):
 41 |         total_params += flattened_layers[i].count_params()
 42 | 
 43 |     return total_params
 44 | 
 45 | 
 46 | class KerasCallback(keras.callbacks.Callback):
 47 |     def __init__(self, debug_model_input=None):
 48 |         super(KerasCallback, self).__init__()
 49 | 
 50 |         self.experiment = deepkit.experiment()
 51 | 
 52 |         self.debug_model_input = debug_model_input
 53 | 
 54 |         self.data_validation = None
 55 |         self.data_validation_size = None
 56 | 
 57 |         self.current = {}
 58 |         self.last_batch_time = time.time()
 59 |         self.start_time = time.time()
 60 |         self.accuracy_metric = None
 61 |         self.all_losses = None
 62 |         self.loss_metric = None
 63 |         self.learning_rate_metric = None
 64 |         self.learning_rate_start = 0
 65 | 
 66 |     def set_model(self, model):
 67 |         super().set_model(model)
 68 |         self.experiment.watch_keras_model(model, self.debug_model_input)
 69 | 
 70 |     def on_train_begin(self, logs={}):
 71 |         self.start_time = time.time()
 72 |         self.last_batch_time = time.time()
 73 | 
 74 |         self.experiment.set_info('parameters', get_total_params(self.model))
 75 |         self.experiment.set_info('keras.image_data_format', keras.backend.image_data_format())
 76 |         self.experiment.set_info('keras.backend', keras.backend.backend())
 77 | 
 78 |         # self.job_backend.upload_keras_graph(self.model)
 79 | 
 80 |         if self.model.optimizer and hasattr(self.model.optimizer, 'get_config'):
 81 |             config = self.model.optimizer.get_config()
 82 |             self.experiment.set_info('optimizer', str(type(self.model.optimizer).__name__))
 83 |             for i, v in config.items():
 84 |                 self.experiment.set_info('optimizer.' + str(i), v)
 85 | 
 86 |         # compatibility with keras 1.x
 87 |         if 'epochs' not in self.params and 'nb_epoch' in self.params:
 88 |             self.params['epochs'] = self.params['nb_epoch']
 89 |         if 'samples' not in self.params and 'nb_sample' in self.params:
 90 |             self.params['samples'] = self.params['nb_sample']
 91 | 
 92 |         traces = ['training', 'validation']
 93 |         if hasattr(self.model, 'output_layers') and len(self.model.output_layers) > 1:
 94 |             traces = []
 95 |             for output in self.model.output_layers:
 96 |                 traces.append('train_' + output.name)
 97 |                 traces.append('val_' + output.name)
 98 | 
 99 |         self.accuracy_metric = self.experiment.define_metric('accuracy', traces=traces)
100 |         self.loss_metric = self.experiment.define_metric('loss', traces=['train', 'val'])
101 |         self.learning_rate_metric = self.experiment.define_metric('learning rate', traces=['start', 'end'])
102 | 
103 |         self.experiment.epoch(1, self.params['epochs'])
104 |         if hasattr(self.model, 'output_layers') and len(self.model.output_layers) > 1:
105 |             loss_traces = []
106 |             for output in self.model.output_layers:
107 |                 loss_traces.append('train_' + output.name)
108 |                 loss_traces.append('val_' + output.name)
109 | 
110 |             self.all_losses = self.experiment.define_metric('loss_all', traces=loss_traces)
111 | 
112 |         # if self.force_insights or self.job_model.insights_enabled:
113 |         #     images = self.build_insight_images()
114 |         #     self.job_backend.job_add_insight(0, images, None)
115 | 
116 |     def on_batch_begin(self, batch, logs={}):
117 |         if 'nb_batches' not in self.current:
118 |             batch_size = logs.get('size', 1)
119 |             if 'samples' in self.params and batch_size > 0:
120 |                 nb_batches = math.ceil(self.params['samples'] / batch_size)  # normal nb batches
121 |             elif 'steps' in self.params:
122 |                 nb_batches = self.params['steps']
123 |             else:
124 |                 nb_batches = 1
125 | 
126 |             self.current['nb_batches'] = nb_batches
127 | 
128 |     def on_batch_end(self, batch, logs={}):
129 |         self.filter_invalid_json_values(logs)
130 |         self.experiment.batch(batch + 1, self.current['nb_batches'], logs.get('size', 1))
131 | 
132 |     def on_epoch_begin(self, epoch, logs={}):
133 |         self.experiment.epoch(epoch + 1, self.params['epochs'])
134 |         self.learning_rate_start = self.get_learning_rate()
135 | 
136 |     def on_epoch_end(self, epoch, logs={}):
137 |         log = logs.copy()
138 | 
139 |         self.filter_invalid_json_values(log)
140 | 
141 |         log['created'] = time.time()
142 |         log['epoch'] = epoch + 1
143 | 
144 |         self.send_metrics(logs, log['epoch'])
145 |         self.send_optimizer_info(log['epoch'])
146 | 
147 |     def send_metrics(self, log, x):
148 |         if 'acc' in log:
149 |             # tf 1
150 |             accuracy_log_name = 'acc'
151 |             val_accuracy_log_name = 'val_acc'
152 |         else:
153 |             # tf2
154 |             accuracy_log_name = 'accuracy'
155 |             val_accuracy_log_name = 'val_accuracy'
156 | 
157 |         total_accuracy_validation = log.get(val_accuracy_log_name, None)
158 |         total_accuracy_training = log.get(accuracy_log_name, None)
159 | 
160 |         if total_accuracy_validation: total_accuracy_validation = float(total_accuracy_validation)
161 |         if total_accuracy_training: total_accuracy_training = float(total_accuracy_training)
162 | 
163 |         loss = log.get('loss', None)
164 |         val_loss = log.get('val_loss', None)
165 |         if loss is not None or val_loss is not None:
166 |             if loss: loss = float(loss)
167 |             if val_loss: val_loss = float(val_loss)
168 |             print('loss, val_loss', loss, val_loss)
169 |             self.loss_metric.send(loss, val_loss, x=x)
170 | 
171 |         accuracy = [total_accuracy_training, total_accuracy_validation]
172 |         if hasattr(self.model, 'output_layers') and len(self.model.output_layers) > 1:
173 |             accuracy = []
174 |             losses = []
175 |             for layer in self.model.output_layers:
176 |                 accuracy.append(log.get(layer.name + '_acc', None))
177 |                 accuracy.append(log.get('val_' + layer.name + '_acc', None))
178 | 
179 |                 losses.append(log.get(layer.name + '_loss', None))
180 |                 losses.append(log.get('val_' + layer.name + '_loss', None))
181 | 
182 |             self.all_losses.send(*losses, x=x)
183 | 
184 |         self.accuracy_metric.send(*accuracy, x=x)
185 | 
186 |     def send_optimizer_info(self, epoch):
187 |         self.learning_rate_metric.send(self.learning_rate_start, self.get_learning_rate(), x=epoch)
188 | 
189 |     def get_learning_rate(self):
190 |         if hasattr(self.model, 'optimizer'):
191 |             config = self.model.optimizer.get_config()
192 | 
193 |             if 'lr' in config and 'decay' in config and hasattr(self.model.optimizer, 'iterations'):
194 |                 iterations = self.model.optimizer.iterations
195 |                 # if hasattr(iterations, 'var') and hasattr(iterations.var, 'as_ndarray'):
196 |                 #     # plaidML
197 |                 #     ndarray = iterations.var.as_ndarray(None)
198 |                 #     iterations = float(ndarray)
199 |                 # else:
200 |                 iterations = float(keras.backend.get_value(iterations))
201 | 
202 |                 return config['lr'] * (1. / (1. + config['decay'] * iterations))
203 | 
204 |             elif 'lr' in config:
205 |                 return config['lr']
206 | 
207 |     def has_multiple_inputs(self):
208 |         return len(self.model.inputs) > 1
209 | 
210 |     def filter_invalid_json_values(self, dict: dict):
211 |         for k, v in dict.items():
212 |             if isinstance(v, (np.ndarray, np.generic)):
213 |                 dict[k] = v.tolist()
214 |             if math.isnan(v) or math.isinf(v):
215 |                 dict[k] = -1
216 | 


--------------------------------------------------------------------------------
/deepkit/utils/pilutil.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2001, 2002 Enthought, Inc.
  3 | All rights reserved.
  4 | Copyright (c) 2003-2017 SciPy Developers.
  5 | All rights reserved.
  6 | A collection of image utilities using the Python Imaging Library (PIL).
  7 | Note that PIL is not a dependency of SciPy and this module is not
  8 | available on systems that don't have PIL installed.
  9 | 
 10 | This source is coming from the scipy package and is here copied since building the
 11 | scipy package on various computers takes a long time and is not necessary to have
 12 | such a big package as requirement when you only need 2-3 methods. Also scipy has a lot of different
 13 | licenes in their package, so we used here only code with the MIT license.
 14 | """
 15 | 
 16 | 
 17 | import numpy
 18 | from PIL import Image
 19 | from numpy import (amin, amax, ravel, asarray, arange, ones, newaxis,
 20 |                    transpose, iscomplexobj, uint8, issubdtype, array)
 21 | 
 22 | 
 23 | def fromimage(im, flatten=False, mode=None):
 24 |     """
 25 |     Return a copy of a PIL image as a numpy array.
 26 |     Parameters
 27 |     ----------
 28 |     im : PIL image
 29 |         Input image.
 30 |     flatten : bool
 31 |         If true, convert the output to grey-scale.
 32 |     mode : str, optional
 33 |         Mode to convert image to, e.g. ``'RGB'``.  See the Notes of the
 34 |         `imread` docstring for more details.
 35 |     Returns
 36 |     -------
 37 |     fromimage : ndarray
 38 |         The different colour bands/channels are stored in the
 39 |         third dimension, such that a grey-image is MxN, an
 40 |         RGB-image MxNx3 and an RGBA-image MxNx4.
 41 |     """
 42 |     if not Image.isImageType(im):
 43 |         raise TypeError("Input is not a PIL image.")
 44 | 
 45 |     if mode is not None:
 46 |         if mode != im.mode:
 47 |             im = im.convert(mode)
 48 |     elif im.mode == 'P':
 49 |         # Mode 'P' means there is an indexed "palette".  If we leave the mode
 50 |         # as 'P', then when we do `a = array(im)` below, `a` will be a 2-D
 51 |         # containing the indices into the palette, and not a 3-D array
 52 |         # containing the RGB or RGBA values.
 53 |         if 'transparency' in im.info:
 54 |             im = im.convert('RGBA')
 55 |         else:
 56 |             im = im.convert('RGB')
 57 | 
 58 |     if flatten:
 59 |         im = im.convert('F')
 60 |     elif im.mode == '1':
 61 |         # Workaround for crash in PIL. When im is 1-bit, the call array(im)
 62 |         # can cause a seg. fault, or generate garbage. See
 63 |         # https://github.com/scipy/scipy/issues/2138 and
 64 |         # https://github.com/python-pillow/Pillow/issues/350.
 65 |         #
 66 |         # This converts im from a 1-bit image to an 8-bit image.
 67 |         im = im.convert('L')
 68 | 
 69 |     a = array(im)
 70 |     return a
 71 | 
 72 | _errstr = "Mode is unknown or incompatible with input array shape."
 73 | 
 74 | 
 75 | # Returns a byte-scaled image
 76 | def bytescale(data, cmin=None, cmax=None, high=255, low=0):
 77 |     """
 78 |     Byte scales an array (image).
 79 |     Byte scaling means converting the input image to uint8 dtype and scaling
 80 |     the range to ``(low, high)`` (default 0-255).
 81 |     If the input image already has dtype uint8, no scaling is done.
 82 |     Parameters
 83 |     ----------
 84 |     data : ndarray
 85 |         PIL image data array.
 86 |     cmin : scalar, optional
 87 |         Bias scaling of small values. Default is ``data.min()``.
 88 |     cmax : scalar, optional
 89 |         Bias scaling of large values. Default is ``data.max()``.
 90 |     high : scalar, optional
 91 |         Scale max value to `high`.  Default is 255.
 92 |     low : scalar, optional
 93 |         Scale min value to `low`.  Default is 0.
 94 |     Returns
 95 |     -------
 96 |     img_array : uint8 ndarray
 97 |         The byte-scaled array.
 98 |     Examples
 99 |     --------
100 |     >>> from scipy.misc import bytescale
101 |     >>> img = np.array([[ 91.06794177,   3.39058326,  84.4221549 ],
102 |     ...                 [ 73.88003259,  80.91433048,   4.88878881],
103 |     ...                 [ 51.53875334,  34.45808177,  27.5873488 ]])
104 |     >>> bytescale(img)
105 |     array([[255,   0, 236],
106 |            [205, 225,   4],
107 |            [140,  90,  70]], dtype=uint8)
108 |     >>> bytescale(img, high=200, low=100)
109 |     array([[200, 100, 192],
110 |            [180, 188, 102],
111 |            [155, 135, 128]], dtype=uint8)
112 |     >>> bytescale(img, cmin=0, cmax=255)
113 |     array([[91,  3, 84],
114 |            [74, 81,  5],
115 |            [52, 34, 28]], dtype=uint8)
116 |     """
117 |     if data.dtype == uint8:
118 |         return data
119 | 
120 |     if high > 255:
121 |         raise ValueError("`high` should be less than or equal to 255.")
122 |     if low < 0:
123 |         raise ValueError("`low` should be greater than or equal to 0.")
124 |     if high < low:
125 |         raise ValueError("`high` should be greater than or equal to `low`.")
126 | 
127 |     if cmin is None:
128 |         cmin = data.min()
129 |     if cmax is None:
130 |         cmax = data.max()
131 | 
132 |     cscale = cmax - cmin
133 |     if cscale < 0:
134 |         raise ValueError("`cmax` should be larger than `cmin`.")
135 |     elif cscale == 0:
136 |         cscale = 1
137 | 
138 |     scale = float(high - low) / cscale
139 |     bytedata = (data - cmin) * scale + low
140 |     return (bytedata.clip(low, high) + 0.5).astype(uint8)
141 | 
142 | 
143 | def toimage(arr, high=255, low=0, cmin=None, cmax=None, pal=None,
144 |             mode=None, channel_axis=None):
145 |     """Takes a numpy array and returns a PIL image.
146 |     The mode of the PIL image depends on the array shape and the `pal` and
147 |     `mode` keywords.
148 |     For 2-D arrays, if `pal` is a valid (N,3) byte-array giving the RGB values
149 |     (from 0 to 255) then ``mode='P'``, otherwise ``mode='L'``, unless mode
150 |     is given as 'F' or 'I' in which case a float and/or integer array is made.
151 |     Notes
152 |     -----
153 |     For 3-D arrays, the `channel_axis` argument tells which dimension of the
154 |     array holds the channel data.
155 |     For 3-D arrays if one of the dimensions is 3, the mode is 'RGB'
156 |     by default or 'YCbCr' if selected.
157 |     The numpy array must be either 2 dimensional or 3 dimensional.
158 |     """
159 |     data = asarray(arr)
160 |     if iscomplexobj(data):
161 |         raise ValueError("Cannot convert a complex-valued array.")
162 |     shape = list(data.shape)
163 |     valid = len(shape) == 2 or ((len(shape) == 3) and
164 |                                 ((3 in shape) or (4 in shape)))
165 |     if not valid:
166 |         raise ValueError("'arr' does not have a suitable array shape for "
167 |                          "any mode.")
168 |     if len(shape) == 2:
169 |         shape = (shape[1], shape[0])  # columns show up first
170 |         if mode == 'F':
171 |             data32 = data.astype(numpy.float32)
172 |             image = Image.frombytes(mode, shape, data32.tostring())
173 |             return image
174 |         if mode in [None, 'L', 'P']:
175 |             bytedata = bytescale(data, high=high, low=low,
176 |                                  cmin=cmin, cmax=cmax)
177 |             image = Image.frombytes('L', shape, bytedata.tostring())
178 |             if pal is not None:
179 |                 image.putpalette(asarray(pal, dtype=uint8).tostring())
180 |                 # Becomes a mode='P' automagically.
181 |             elif mode == 'P':  # default gray-scale
182 |                 pal = (arange(0, 256, 1, dtype=uint8)[:, newaxis] *
183 |                        ones((3,), dtype=uint8)[newaxis, :])
184 |                 image.putpalette(asarray(pal, dtype=uint8).tostring())
185 |             return image
186 |         if mode == '1':  # high input gives threshold for 1
187 |             bytedata = (data > high)
188 |             image = Image.frombytes('1', shape, bytedata.tostring())
189 |             return image
190 |         if cmin is None:
191 |             cmin = amin(ravel(data))
192 |         if cmax is None:
193 |             cmax = amax(ravel(data))
194 |         data = (data*1.0 - cmin)*(high - low)/(cmax - cmin) + low
195 |         if mode == 'I':
196 |             data32 = data.astype(numpy.uint32)
197 |             image = Image.frombytes(mode, shape, data32.tostring())
198 |         else:
199 |             raise ValueError(_errstr)
200 |         return image
201 | 
202 |     # if here then 3-d array with a 3 or a 4 in the shape length.
203 |     # Check for 3 in datacube shape --- 'RGB' or 'YCbCr'
204 |     if channel_axis is None:
205 |         if (3 in shape):
206 |             ca = numpy.flatnonzero(asarray(shape) == 3)[0]
207 |         else:
208 |             ca = numpy.flatnonzero(asarray(shape) == 4)
209 |             if len(ca):
210 |                 ca = ca[0]
211 |             else:
212 |                 raise ValueError("Could not find channel dimension.")
213 |     else:
214 |         ca = channel_axis
215 | 
216 |     numch = shape[ca]
217 |     if numch not in [3, 4]:
218 |         raise ValueError("Channel axis dimension is not valid.")
219 | 
220 |     bytedata = bytescale(data, high=high, low=low, cmin=cmin, cmax=cmax)
221 |     if ca == 2:
222 |         strdata = bytedata.tostring()
223 |         shape = (shape[1], shape[0])
224 |     elif ca == 1:
225 |         strdata = transpose(bytedata, (0, 2, 1)).tostring()
226 |         shape = (shape[2], shape[0])
227 |     elif ca == 0:
228 |         strdata = transpose(bytedata, (1, 2, 0)).tostring()
229 |         shape = (shape[2], shape[1])
230 |     if mode is None:
231 |         if numch == 3:
232 |             mode = 'RGB'
233 |         else:
234 |             mode = 'RGBA'
235 | 
236 |     if mode not in ['RGB', 'RGBA', 'YCbCr', 'CMYK']:
237 |         raise ValueError(_errstr)
238 | 
239 |     if mode in ['RGB', 'YCbCr']:
240 |         if numch != 3:
241 |             raise ValueError("Invalid array shape for mode.")
242 |     if mode in ['RGBA', 'CMYK']:
243 |         if numch != 4:
244 |             raise ValueError("Invalid array shape for mode.")
245 | 
246 |     # Here we know data and mode is correct
247 |     image = Image.frombytes(mode, shape, strdata)
248 |     return image
249 | 
250 | 
251 | def imresize(arr, size, interp='bilinear', mode=None):
252 |     """
253 |     Resize an image.
254 |     Parameters
255 |     ----------
256 |     arr : ndarray
257 |         The array of image to be resized.
258 |     size : int, float or tuple
259 |         * int   - Percentage of current size.
260 |         * float - Fraction of current size.
261 |         * tuple - Size of the output image.
262 |     interp : str, optional
263 |         Interpolation to use for re-sizing ('nearest', 'lanczos', 'bilinear', 'bicubic'
264 |         or 'cubic').
265 |     mode : str, optional
266 |         The PIL image mode ('P', 'L', etc.) to convert `arr` before resizing.
267 |     Returns
268 |     -------
269 |     imresize : ndarray
270 |         The resized array of image.
271 |     See Also
272 |     --------
273 |     toimage : Implicitly used to convert `arr` according to `mode`.
274 |     scipy.ndimage.zoom : More generic implementation that does not use PIL.
275 |     """
276 |     im = toimage(arr, mode=mode)
277 |     ts = type(size)
278 |     if issubdtype(ts, int):
279 |         percent = size / 100.0
280 |         size = tuple((array(im.size)*percent).astype(int))
281 |     elif issubdtype(type(size), float):
282 |         size = tuple((array(im.size)*size).astype(int))
283 |     else:
284 |         size = (size[1], size[0])
285 |     func = {'nearest': 0, 'lanczos': 1, 'bilinear': 2, 'bicubic': 3, 'cubic': 3}
286 |     imnew = im.resize(size, resample=func[interp])
287 |     return fromimage(imnew)


--------------------------------------------------------------------------------
/deepkit/pytorch_graph.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import torch
  3 | 
  4 | methods_OP = ['attributeNames', 'hasMultipleOutputs', 'hasUses', 'inputs',
  5 |               'kind', 'outputs', 'outputsSize', 'scopeName']
  6 | # Some additional methods to explure for methods_IO are
  7 | #
  8 | #   'unique' (type int)
  9 | #   'type' (type <Tensor<class 'torch._C.Type'>>)
 10 | #
 11 | # But the below are sufficient for now.
 12 | methods_IO = ['node', 'offset', 'debugName']
 13 | 
 14 | GETATTR_KIND = 'prim::GetAttr'
 15 | CLASSTYPE_KIND = 'ClassType'
 16 | 
 17 | class NodeBase(object):
 18 |     def __init__(self, debugName=None, inputs=None, scope=None, tensor_size=None, op_type='UnSpecified', attributes='', node=None):
 19 |         # TODO; Specify a __slots__ for this class or potentially
 20 |         # used namedtuple instead
 21 |         self.node = node
 22 |         self.debugName = debugName
 23 |         self.inputs = inputs
 24 |         self.tensor_size = tensor_size
 25 |         self.kind = op_type
 26 |         self.attributes = attributes
 27 |         self.scope = scope
 28 | 
 29 |     def __repr__(self):
 30 |         repr = []
 31 |         repr.append(str(type(self)))
 32 |         for m in dir(self):
 33 |             if '__' not in m:
 34 |                 repr.append(m + ': ' + str(getattr(self, m)) + str(type(getattr(self, m))))
 35 |         return '\n'.join(repr) + '\n\n'
 36 | 
 37 | 
 38 | class NodePy(NodeBase):
 39 |     def __init__(self, node_cpp, valid_methods):
 40 |         super(NodePy, self).__init__(node_cpp)
 41 |         valid_methods = valid_methods[:]
 42 |         self.inputs = []
 43 | 
 44 |         for m in valid_methods:
 45 |             if m == 'inputs' or m == 'outputs':
 46 |                 list_of_node = list(getattr(node_cpp, m)())
 47 |                 io_unique_names = []
 48 |                 io_tensor_sizes = []
 49 |                 for n in list_of_node:
 50 |                     io_unique_names.append(n.debugName())
 51 |                     if n.isCompleteTensor():
 52 |                         io_tensor_sizes.append(n.type().sizes())
 53 |                     else:
 54 |                         io_tensor_sizes.append(None)
 55 | 
 56 |                 setattr(self, m, io_unique_names)
 57 |                 setattr(self, m + 'tensor_size', io_tensor_sizes)
 58 | 
 59 |             else:
 60 |                 setattr(self, m, getattr(node_cpp, m)())
 61 | 
 62 | 
 63 | class NodePyIO(NodePy):
 64 |     def __init__(self, node_cpp, input_or_output=None):
 65 |         super(NodePyIO, self).__init__(node_cpp, methods_IO)
 66 |         try:
 67 |             tensor_size = node_cpp.type().sizes()
 68 |         except RuntimeError:
 69 |             tensor_size = [1, ]  # fail when constant model is used.
 70 |         self.tensor_size = tensor_size
 71 |         # Kind attribute string is purely descriptive and will be shown
 72 |         # in detailed information for the node in TensorBoard's graph plugin.
 73 |         #
 74 |         # NodePyOP nodes get this from their kind() method.
 75 |         self.kind = 'Parameter'
 76 |         if input_or_output:
 77 |             self.input_or_output = input_or_output
 78 |             self.kind = 'IO Node'
 79 | 
 80 | 
 81 | class NodePyOP(NodePy):
 82 |     def __init__(self, node_cpp):
 83 |         super(NodePyOP, self).__init__(node_cpp, methods_OP)
 84 |         # Replace single quote which causes strange behavior in TensorBoard
 85 |         # TODO: See if we can remove this in the future
 86 |         self.attributes = str({k: node_cpp[k] for k in node_cpp.attributeNames()}).replace("'", ' ')
 87 |         self.kind = node_cpp.kind()
 88 | 
 89 | 
 90 | class GraphPy(object):
 91 |     """Helper class to convert torch.nn.Module to GraphDef proto and visualization
 92 |     with TensorBoard.
 93 | 
 94 |     GraphDef generation operates in two passes:
 95 | 
 96 |     In the first pass, all nodes are read and saved to two lists.
 97 |     One list is for input/output nodes (nodes_io), which only have inbound
 98 |     or outbound connections, but not both. Another list is for internal
 99 |     operator nodes (nodes_op). The first pass also saves all scope name
100 |     appeared in the nodes in scope_name_appeared list for later processing.
101 | 
102 |     In the second pass, scope names are fully applied to all nodes.
103 |     debugNameToScopedName is a mapping from a node's ID to its fully qualified
104 |     scope name. e.g. Net1/Linear[0]/1. Unfortunately torch.jit doesn't have
105 |     totally correct scope output, so this is nontrivial. The function
106 |     populate_namespace_from_OP_to_IO and find_common_root are used to
107 |     assign scope name to a node based on the connection between nodes
108 |     in a heuristic kind of way. Bookkeeping is done with shallowest_scope_name
109 |     and scope_name_appeared.
110 |     """
111 |     def __init__(self):
112 |         self.nodes_op = []
113 |         self.nodes_io = OrderedDict()
114 |         self.unique_name_to_scoped_name = {}
115 |         self.shallowest_scope_name = 'default'
116 |         self.scope_name_appeared = []
117 | 
118 |     def append(self, x):
119 |         if isinstance(x, NodePyIO):
120 |             self.nodes_io[x.debugName] = x
121 |         if isinstance(x, NodePyOP):
122 |             self.nodes_op.append(x)
123 | 
124 |     def printall(self):
125 |         print('all nodes')
126 |         for node in self.nodes_op:
127 |             print(node)
128 |         for key in self.nodes_io:
129 |             print(self.nodes_io[key])
130 | 
131 |     def find_common_root(self):
132 |         for fullscope in self.scope_name_appeared:
133 |             if fullscope:
134 |                 self.shallowest_scope_name = fullscope.split('/')[0]
135 | 
136 |     def populate_namespace_from_OP_to_IO(self):
137 |         for node in self.nodes_op:
138 |             for node_output, outputSize in zip(node.outputs, node.outputstensor_size):
139 |                 self.scope_name_appeared.append(node.scopeName)
140 |                 self.nodes_io[node_output] = NodeBase(node_output,
141 |                                                       node.inputs,
142 |                                                       node.scopeName,
143 |                                                       outputSize,
144 |                                                       op_type=node.kind,
145 |                                                       node=node,
146 |                                                       attributes=node.attributes)
147 | 
148 |         self.find_common_root()
149 | 
150 |         for node in self.nodes_op:
151 |             for input_node_id in node.inputs:
152 |                 self.unique_name_to_scoped_name[input_node_id] = node.scopeName + '/' + input_node_id
153 | 
154 |         for key, node in self.nodes_io.items():
155 |             if type(node) == NodeBase:
156 |                 self.unique_name_to_scoped_name[key] = node.scope + '/' + node.debugName
157 |             if hasattr(node, 'input_or_output'):
158 |                 self.unique_name_to_scoped_name[key] = node.input_or_output + '/' + node.debugName
159 | 
160 |             if hasattr(node, 'scope') and node.scope is not None:
161 |                 self.unique_name_to_scoped_name[key] = node.scope + '/' + node.debugName
162 |                 if node.scope == '' and self.shallowest_scope_name:
163 |                     self.unique_name_to_scoped_name[node.debugName] = self.shallowest_scope_name + '/' + node.debugName
164 | 
165 |         # replace name
166 |         for key, node in self.nodes_io.items():
167 |             self.nodes_io[key].inputs = [self.unique_name_to_scoped_name[node_input_id] for node_input_id in node.inputs]
168 |             if node.debugName in self.unique_name_to_scoped_name:
169 |                 self.nodes_io[key].debugName = self.unique_name_to_scoped_name[node.debugName]
170 | 
171 | 
172 | def parse(graph, trace, args=None, omit_useless_nodes=True):
173 |     """This method parses an optimized PyTorch model graph and produces
174 |     a list of nodes and node stats for eventual conversion to TensorBoard
175 |     protobuf format.
176 | 
177 |     Args:
178 |       graph (PyTorch module): The model graph to be parsed.
179 |       trace (PyTorch JIT TracedModule): The model trace to be parsed.
180 |       args (tuple): input tensor[s] for the model.
181 |       omit_useless_nodes (boolean): Whether to remove nodes from the graph.
182 |     """
183 |     n_inputs = len(args)
184 | 
185 |     scope = {}
186 |     nodes_py = GraphPy()
187 |     for node in graph.inputs():
188 |         if omit_useless_nodes:
189 |             if len(node.uses()) == 0:  # number of user of the node (= number of outputs/ fanout)
190 |                 continue
191 | 
192 |         if node.type().kind() != CLASSTYPE_KIND:
193 |             nodes_py.append(NodePyIO(node, 'input'))
194 | 
195 |     attr_to_scope = dict()
196 |     for node in graph.nodes():
197 |         if node.kind() == GETATTR_KIND:
198 |             attr_name = node.s('name')
199 |             parent = node.input().node()
200 |             if parent.kind() == GETATTR_KIND:  # If the parent node is not the top-level "self" node
201 |                 parent_attr_name = parent.s('name')
202 |                 parent_scope = attr_to_scope[parent_attr_name]
203 |                 attr_scope = parent_scope.split('/')[-1]
204 |                 attr_to_scope[attr_name] = '{}/{}.{}'.format(parent_scope, attr_scope, attr_name)
205 |             else:
206 |                 attr_to_scope[attr_name] = '__module.{}'.format(attr_name)
207 |             # We don't need classtype nodes; scope will provide this information
208 |             if node.output().type().kind() != CLASSTYPE_KIND:
209 |                 node_py = NodePyOP(node)
210 |                 node_py.scopeName = attr_to_scope[attr_name]
211 |                 nodes_py.append(node_py)
212 |         else:
213 |             nodes_py.append(NodePyOP(node))
214 | 
215 |     for i, node in enumerate(graph.outputs()):  # Create sink nodes for output ops
216 |         node_py = NodePyIO(node, 'output')
217 |         node_py.debugName = "output.{}".format(i + 1)
218 |         node_py.inputs = [node.debugName()]
219 |         nodes_py.append(node_py)
220 | 
221 |     def parse_traced_name(module_name):
222 |         prefix = 'TracedModule['
223 |         suffix = ']'
224 |         if module_name.startswith(prefix) and module_name.endswith(suffix):
225 |             module_name = module_name[len(prefix):-len(suffix)]
226 |         return module_name
227 | 
228 |     alias_to_name = dict()
229 |     base_name = parse_traced_name(trace._name)
230 |     for name, module in trace.named_modules(prefix='__module'):
231 |         mod_name = parse_traced_name(module._name)
232 |         attr_name = name.split('.')[-1]
233 |         alias_to_name[name] = '{}[{}]'.format(mod_name, attr_name)
234 | 
235 |     for node in nodes_py.nodes_op:
236 |         module_aliases = node.scopeName.split('/')
237 |         replacements = [
238 |             alias_to_name[alias]
239 |             if alias in alias_to_name
240 |             else alias.split('.')[-1]
241 |             for alias in module_aliases
242 |         ]
243 |         node.scopeName = base_name
244 |         if any(replacements):
245 |             node.scopeName += '/' + '/'.join(replacements)
246 | 
247 |     nodes_py.populate_namespace_from_OP_to_IO()
248 |     return nodes_py
249 | 
250 | 
251 | def build_graph(model, args):
252 |     try:
253 |         trace = torch.jit.trace(model, args)
254 |         graph = trace.graph
255 |         torch._C._jit_pass_inline(graph)
256 |     except RuntimeError as e:
257 |         print(e)
258 |         print('Error occurs, No graph saved')
259 |         raise e
260 | 
261 |     list_of_nodes = parse(graph, trace, args)
262 |     return graph, list_of_nodes.nodes_io
263 | 


--------------------------------------------------------------------------------
/examples/resnetv2/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | #os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
  4 | #os.environ["RUNFILES_DIR"] = "/usr/local/share/plaidml"
  5 | #os.environ["PLAIDML_NATIVE_PATH"] = "/usr/local/lib/libplaidml.dylib"
  6 | 
  7 | import keras
  8 | from keras.layers import Dense, Conv2D, BatchNormalization, Activation
  9 | from keras.layers import AveragePooling2D, Input, Flatten
 10 | from keras.optimizers import Adam
 11 | from keras.callbacks import ModelCheckpoint, LearningRateScheduler
 12 | from keras.callbacks import ReduceLROnPlateau
 13 | from keras.preprocessing.image import ImageDataGenerator
 14 | from keras.regularizers import l2
 15 | from keras.models import Model
 16 | from keras.datasets import cifar10
 17 | import numpy as np
 18 | import deepkit
 19 | 
 20 | experiment = deepkit.experiment()
 21 | 
 22 | experiment.add_label('resnet', 'keras')
 23 | 
 24 | # Training parameters
 25 | batch_size = 128  # orig paper trained all networks with batch_size=128
 26 | epochs = 200
 27 | data_augmentation = False
 28 | num_classes = 10
 29 | 
 30 | # Subtracting pixel mean improves accuracy
 31 | subtract_pixel_mean = True
 32 | 
 33 | # Model parameter
 34 | # ----------------------------------------------------------------------------
 35 | #           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
 36 | # Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
 37 | #           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
 38 | # ----------------------------------------------------------------------------
 39 | # ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
 40 | # ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
 41 | # ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
 42 | # ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
 43 | # ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
 44 | # ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
 45 | # ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
 46 | # ---------------------------------------------------------------------------
 47 | n = 3
 48 | 
 49 | # Model version
 50 | # Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
 51 | version = 1
 52 | 
 53 | # Computed depth from supplied model parameter n
 54 | if version == 1:
 55 |     depth = n * 6 + 2
 56 | elif version == 2:
 57 |     depth = n * 9 + 2
 58 | 
 59 | # Model name, depth and version
 60 | model_type = 'ResNet%dv%d' % (depth, version)
 61 | 
 62 | # Load the CIFAR10 data.
 63 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
 64 | 
 65 | # Input image dimensions.
 66 | input_shape = x_train.shape[1:]
 67 | 
 68 | # Normalize data.
 69 | x_train = x_train.astype('float32') / 255
 70 | x_test = x_test.astype('float32') / 255
 71 | 
 72 | # If subtract pixel mean is enabled
 73 | if subtract_pixel_mean:
 74 |     x_train_mean = np.mean(x_train, axis=0)
 75 |     x_train -= x_train_mean
 76 |     x_test -= x_train_mean
 77 | 
 78 | print('x_train shape:', x_train.shape)
 79 | print(x_train.shape[0], 'train samples')
 80 | print(x_test.shape[0], 'test samples')
 81 | print('y_train shape:', y_train.shape)
 82 | 
 83 | # Convert class vectors to binary class matrices.
 84 | y_train = keras.utils.to_categorical(y_train, num_classes)
 85 | y_test = keras.utils.to_categorical(y_test, num_classes)
 86 | 
 87 | 
 88 | def lr_schedule(epoch):
 89 |     """Learning Rate Schedule
 90 | 
 91 |     Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
 92 |     Called automatically every epoch as part of callbacks during training.
 93 | 
 94 |     # Arguments
 95 |         epoch (int): The number of epochs
 96 | 
 97 |     # Returns
 98 |         lr (float32): learning rate
 99 |     """
100 |     lr = 1e-3
101 |     if epoch > 180:
102 |         lr *= 0.5e-3
103 |     elif epoch > 160:
104 |         lr *= 1e-3
105 |     elif epoch > 120:
106 |         lr *= 1e-2
107 |     elif epoch > 80:
108 |         lr *= 1e-1
109 |     print('Learning rate: ', lr)
110 |     return lr
111 | 
112 | 
113 | def resnet_layer(inputs,
114 |                  num_filters=16,
115 |                  kernel_size=3,
116 |                  strides=1,
117 |                  activation='relu',
118 |                  batch_normalization=True,
119 |                  conv_first=True):
120 |     """2D Convolution-Batch Normalization-Activation stack builder
121 | 
122 |     # Arguments
123 |         inputs (tensor): input tensor from input image or previous layer
124 |         num_filters (int): Conv2D number of filters
125 |         kernel_size (int): Conv2D square kernel dimensions
126 |         strides (int): Conv2D square stride dimensions
127 |         activation (string): activation name
128 |         batch_normalization (bool): whether to include batch normalization
129 |         conv_first (bool): conv-bn-activation (True) or
130 |             bn-activation-conv (False)
131 | 
132 |     # Returns
133 |         x (tensor): tensor as input to the next layer
134 |     """
135 |     conv = Conv2D(num_filters,
136 |                   kernel_size=kernel_size,
137 |                   strides=strides,
138 |                   padding='same',
139 |                   kernel_initializer='he_normal',
140 |                   kernel_regularizer=l2(1e-4))
141 | 
142 |     x = inputs
143 |     if conv_first:
144 |         x = conv(x)
145 |         if batch_normalization:
146 |             x = BatchNormalization()(x)
147 |         if activation is not None:
148 |             x = Activation(activation)(x)
149 |     else:
150 |         if batch_normalization:
151 |             x = BatchNormalization()(x)
152 |         if activation is not None:
153 |             x = Activation(activation)(x)
154 |         x = conv(x)
155 |     return x
156 | 
157 | 
158 | def resnet_v1(input_shape, depth, num_classes=10):
159 |     """ResNet Version 1 Model builder [a]
160 | 
161 |     Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
162 |     Last ReLU is after the shortcut connection.
163 |     At the beginning of each stage, the feature map size is halved (downsampled)
164 |     by a convolutional layer with strides=2, while the number of filters is
165 |     doubled. Within each stage, the layers have the same number filters and the
166 |     same number of filters.
167 |     Features maps sizes:
168 |     stage 0: 32x32, 16
169 |     stage 1: 16x16, 32
170 |     stage 2:  8x8,  64
171 |     The Number of parameters is approx the same as Table 6 of [a]:
172 |     ResNet20 0.27M
173 |     ResNet32 0.46M
174 |     ResNet44 0.66M
175 |     ResNet56 0.85M
176 |     ResNet110 1.7M
177 | 
178 |     # Arguments
179 |         input_shape (tensor): shape of input image tensor
180 |         depth (int): number of core convolutional layers
181 |         num_classes (int): number of classes (CIFAR10 has 10)
182 | 
183 |     # Returns
184 |         model (Model): Keras model instance
185 |     """
186 |     if (depth - 2) % 6 != 0:
187 |         raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
188 |     # Start model definition.
189 |     num_filters = 16
190 |     num_res_blocks = int((depth - 2) / 6)
191 | 
192 |     inputs = Input(shape=input_shape)
193 |     x = resnet_layer(inputs=inputs)
194 |     # Instantiate the stack of residual units
195 |     for stack in range(3):
196 |         for res_block in range(num_res_blocks):
197 |             strides = 1
198 |             if stack > 0 and res_block == 0:  # first layer but not first stack
199 |                 strides = 2  # downsample
200 |             y = resnet_layer(inputs=x,
201 |                              num_filters=num_filters,
202 |                              strides=strides)
203 |             y = resnet_layer(inputs=y,
204 |                              num_filters=num_filters,
205 |                              activation=None)
206 |             if stack > 0 and res_block == 0:  # first layer but not first stack
207 |                 # linear projection residual shortcut connection to match
208 |                 # changed dims
209 |                 x = resnet_layer(inputs=x,
210 |                                  num_filters=num_filters,
211 |                                  kernel_size=1,
212 |                                  strides=strides,
213 |                                  activation=None,
214 |                                  batch_normalization=False)
215 |             x = keras.layers.add([x, y])
216 |             x = Activation('relu')(x)
217 |         num_filters *= 2
218 | 
219 |     # Add classifier on top.
220 |     # v1 does not use BN after last shortcut connection-ReLU
221 |     x = AveragePooling2D(pool_size=8)(x)
222 |     y = Flatten()(x)
223 |     outputs = Dense(num_classes,
224 |                     activation='softmax',
225 |                     kernel_initializer='he_normal')(y)
226 | 
227 |     # Instantiate model.
228 |     model = Model(inputs=inputs, outputs=outputs)
229 |     return model
230 | 
231 | 
232 | def resnet_v2(input_shape, depth, num_classes=10):
233 |     """ResNet Version 2 Model builder [b]
234 | 
235 |     Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
236 |     bottleneck layer
237 |     First shortcut connection per layer is 1 x 1 Conv2D.
238 |     Second and onwards shortcut connection is identity.
239 |     At the beginning of each stage, the feature map size is halved (downsampled)
240 |     by a convolutional layer with strides=2, while the number of filter maps is
241 |     doubled. Within each stage, the layers have the same number filters and the
242 |     same filter map sizes.
243 |     Features maps sizes:
244 |     conv1  : 32x32,  16
245 |     stage 0: 32x32,  64
246 |     stage 1: 16x16, 128
247 |     stage 2:  8x8,  256
248 | 
249 |     # Arguments
250 |         input_shape (tensor): shape of input image tensor
251 |         depth (int): number of core convolutional layers
252 |         num_classes (int): number of classes (CIFAR10 has 10)
253 | 
254 |     # Returns
255 |         model (Model): Keras model instance
256 |     """
257 |     if (depth - 2) % 9 != 0:
258 |         raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
259 |     # Start model definition.
260 |     num_filters_in = 16
261 |     num_res_blocks = int((depth - 2) / 9)
262 | 
263 |     inputs = Input(shape=input_shape)
264 |     # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
265 |     x = resnet_layer(inputs=inputs,
266 |                      num_filters=num_filters_in,
267 |                      conv_first=True)
268 | 
269 |     # Instantiate the stack of residual units
270 |     for stage in range(3):
271 |         for res_block in range(num_res_blocks):
272 |             activation = 'relu'
273 |             batch_normalization = True
274 |             strides = 1
275 |             if stage == 0:
276 |                 num_filters_out = num_filters_in * 4
277 |                 if res_block == 0:  # first layer and first stage
278 |                     activation = None
279 |                     batch_normalization = False
280 |             else:
281 |                 num_filters_out = num_filters_in * 2
282 |                 if res_block == 0:  # first layer but not first stage
283 |                     strides = 2    # downsample
284 | 
285 |             # bottleneck residual unit
286 |             y = resnet_layer(inputs=x,
287 |                              num_filters=num_filters_in,
288 |                              kernel_size=1,
289 |                              strides=strides,
290 |                              activation=activation,
291 |                              batch_normalization=batch_normalization,
292 |                              conv_first=False)
293 |             y = resnet_layer(inputs=y,
294 |                              num_filters=num_filters_in,
295 |                              conv_first=False)
296 |             y = resnet_layer(inputs=y,
297 |                              num_filters=num_filters_out,
298 |                              kernel_size=1,
299 |                              conv_first=False)
300 |             if res_block == 0:
301 |                 # linear projection residual shortcut connection to match
302 |                 # changed dims
303 |                 x = resnet_layer(inputs=x,
304 |                                  num_filters=num_filters_out,
305 |                                  kernel_size=1,
306 |                                  strides=strides,
307 |                                  activation=None,
308 |                                  batch_normalization=False)
309 |             x = keras.layers.add([x, y])
310 | 
311 |         num_filters_in = num_filters_out
312 | 
313 |     # Add classifier on top.
314 |     # v2 has BN-ReLU before Pooling
315 |     x = BatchNormalization()(x)
316 |     x = Activation('relu')(x)
317 |     x = AveragePooling2D(pool_size=8)(x)
318 |     y = Flatten()(x)
319 |     outputs = Dense(num_classes,
320 |                     activation='softmax',
321 |                     kernel_initializer='he_normal')(y)
322 | 
323 |     # Instantiate model.
324 |     model = Model(inputs=inputs, outputs=outputs)
325 |     return model
326 | 
327 | 
328 | if version == 2:
329 |     model = resnet_v2(input_shape=input_shape, depth=depth)
330 | else:
331 |     model = resnet_v1(input_shape=input_shape, depth=depth)
332 | 
333 | model.compile(loss='categorical_crossentropy',
334 |               optimizer=Adam(),
335 |               metrics=['accuracy'])
336 | model.summary()
337 | print(model_type)
338 | 
339 | # Prepare model model saving directory.
340 | save_dir = os.path.join(os.getcwd(), 'saved_models')
341 | model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
342 | if not os.path.isdir(save_dir):
343 |     os.makedirs(save_dir)
344 | filepath = os.path.join(save_dir, model_name)
345 | 
346 | # Prepare callbacks for model saving and for learning rate adjustment.
347 | checkpoint = ModelCheckpoint(filepath=filepath,
348 |                              monitor='val_acc',
349 |                              verbose=1,
350 |                              save_best_only=True)
351 | 
352 | lr_scheduler = LearningRateScheduler(lr_schedule)
353 | 
354 | lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
355 |                                cooldown=0,
356 |                                patience=5,
357 |                                min_lr=0.5e-6)
358 | 
359 | experiment.watch_keras_model(model)
360 | callbacks = [checkpoint, lr_reducer, lr_scheduler, experiment.create_keras_callback()]
361 | 
362 | # Run training, with or without data augmentation.
363 | if not data_augmentation:
364 |     print('Not using data augmentation.')
365 |     model.fit(x_train, y_train,
366 |               batch_size=batch_size,
367 |               epochs=epochs,
368 |               validation_data=(x_test, y_test),
369 |               shuffle=True,
370 |               callbacks=callbacks)
371 | else:
372 |     print('Using real-time data augmentation.')
373 |     # This will do preprocessing and realtime data augmentation:
374 |     datagen = ImageDataGenerator(
375 |         # set input mean to 0 over the dataset
376 |         featurewise_center=False,
377 |         # set each sample mean to 0
378 |         samplewise_center=False,
379 |         # divide inputs by std of dataset
380 |         featurewise_std_normalization=False,
381 |         # divide each input by its std
382 |         samplewise_std_normalization=False,
383 |         # apply ZCA whitening
384 |         zca_whitening=False,
385 |         # epsilon for ZCA whitening
386 |         zca_epsilon=1e-06,
387 |         # randomly rotate images in the range (deg 0 to 180)
388 |         rotation_range=0,
389 |         # randomly shift images horizontally
390 |         width_shift_range=0.1,
391 |         # randomly shift images vertically
392 |         height_shift_range=0.1,
393 |         # set range for random shear
394 |         shear_range=0.,
395 |         # set range for random zoom
396 |         zoom_range=0.,
397 |         # set range for random channel shifts
398 |         channel_shift_range=0.,
399 |         # set mode for filling points outside the input boundaries
400 |         fill_mode='nearest',
401 |         # value used for fill_mode = "constant"
402 |         cval=0.,
403 |         # randomly flip images
404 |         horizontal_flip=True,
405 |         # randomly flip images
406 |         vertical_flip=False,
407 |         # set rescaling factor (applied before any other transformation)
408 |         rescale=None,
409 |         # set function that will be applied on each input
410 |         preprocessing_function=None,
411 |         # image data format, either "channels_first" or "channels_last"
412 |         data_format=None,
413 |         # fraction of images reserved for validation (strictly between 0 and 1)
414 |         validation_split=0.0)
415 | 
416 |     # Compute quantities required for featurewise normalization
417 |     # (std, mean, and principal components if ZCA whitening is applied).
418 |     datagen.fit(x_train)
419 | 
420 |     # Fit the model on the batches generated by datagen.flow().
421 |     model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
422 |                         validation_data=(x_test, y_test),
423 |                         epochs=epochs, verbose=1, workers=4,
424 |                         callbacks=callbacks)
425 | 
426 | # Score trained model.
427 | scores = model.evaluate(x_test, y_test, verbose=1)
428 | print('Test loss:', scores[0])
429 | print('Test accuracy:', scores[1])
430 | 


--------------------------------------------------------------------------------
/deepkit/pytorch.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import re
  3 | from struct import pack
  4 | from typing import Dict, Optional
  5 | 
  6 | import PIL.Image
  7 | import numpy as np
  8 | 
  9 | import deepkit.experiment
 10 | import deepkit.debugger
 11 | from deepkit.pytorch_graph import build_graph
 12 | from deepkit.utils import array_to_img
 13 | from deepkit.utils.image import get_layer_vis_square, get_image_tales, make_image_from_dense
 14 | 
 15 | blacklist_attributes = {'weight', 'dump_patches'}
 16 | 
 17 | 
 18 | def extract_attributes(module):
 19 |     res = {}
 20 |     for attr in dir(module):
 21 |         if attr in blacklist_attributes: continue
 22 |         if attr.startswith('_'): continue
 23 |         val = getattr(module, attr)
 24 |         if not isinstance(val, (str, bool, int, float, list, tuple)):
 25 |             continue
 26 |         res[attr] = val
 27 | 
 28 |     return res
 29 | 
 30 | 
 31 | scope_name_prog = re.compile(r'^([a-zA-Z0-9_\-]+)/')
 32 | short_name_prog = re.compile(r'\[([a-zA-Z0-9_]+)\]')
 33 | is_variable = re.compile(r'/([a-zA-Z0-9_]+(?:\.[0-9]+)?)$')
 34 | 
 35 | 
 36 | def get_layer_id(name: str):
 37 |     """
 38 |     Takes a name like 'ResNet/Conv2d[conv1]/1504' and converts it to a shorter version
 39 | 
 40 |     Examples
 41 |         1. 'ResNet/Sequential[layer1]/BasicBlock[1]/Conv2d[conv2]/1658'
 42 |         -> layer1.1.conv2/1657
 43 |         2. 'ResNet/Sequential[layer2]/BasicBlock[0]/BatchNorm2d[bn1]/1714'
 44 |         -> layer2.0.bn1/1714
 45 |         3. 'ResNet/Sequential[layer1]/BasicBlock[0]/input.4'
 46 |         -> layer1.0/input.4
 47 |         4. 'input/input.1'
 48 |         -> input-1
 49 |         5. 'output/output.1'
 50 |         -> output-1
 51 |     """
 52 |     res = short_name_prog.findall(name)
 53 |     var = is_variable.search(name)
 54 |     if not res:
 55 |         return name
 56 |     if var:
 57 |         return '.'.join(res) + '/' + var.group(1)
 58 |     return '.'.join(res)
 59 | 
 60 | 
 61 | def get_scope_id(name: str):
 62 |     """
 63 |     Takes a name like 'ResNet/Conv2d[conv1]/1504' and converts it to
 64 |     its scope variant, which could be later used for `named_modules` method.
 65 |     Examples
 66 |         1. 'ResNet/Sequential[layer1]/BasicBlock[1]/Conv2d[conv2]/1658'
 67 |         -> Resnet.layer1.1.conv2
 68 |         2. 'ResNet/Sequential[layer2]/BasicBlock[0]/BatchNorm2d[bn1]/1714'
 69 |         -> Resnet.layer2.0.bn1
 70 |         2. 'ResNet/Sequential[layer2]/BasicBlock[0]/BatchNorm2d[bn1]/input.2'
 71 |         -> Resnet.layer2.0.bn1
 72 |         3. 'ResNet/Sequential[layer1]/BasicBlock[0]/input.4'
 73 |         -> Resnet.layer1.0
 74 |         3. 'ResNet/x.1'
 75 |         -> Resnet.x
 76 |     """
 77 |     res = short_name_prog.findall(name)
 78 |     if not res:
 79 |         # no groups mean its something like Resnet/x.2, which we normalize to Resnet
 80 |         return name.split('/')[0]
 81 | 
 82 |     scope = scope_name_prog.findall(name)
 83 | 
 84 |     return scope[0] + '.' + ('.'.join(res))
 85 | 
 86 | 
 87 | def get_pytorch_graph(net, inputs):
 88 |     names_from_id = dict()
 89 |     nodes_from_id = dict()
 90 |     names_from_debug = dict()
 91 |     scopes_from_debug = dict()
 92 |     names_to_scope = dict()
 93 |     scope_nodes = dict()
 94 |     # names_to_scope = dict()
 95 | 
 96 |     container_names = dict()
 97 |     known_modules_map = dict()
 98 |     known_modules_name_map = dict()
 99 | 
100 |     torch_graph, torch_nodes = build_graph(net, inputs)
101 | 
102 |     for name, module in net.named_modules(prefix=type(net).__name__):
103 |         known_modules_map[module] = name
104 |         known_modules_name_map[name] = module
105 | 
106 |     def get_parent(name, go_up=1) -> str:
107 |         return '.'.join(name.split('.')[:go_up * -1])
108 | 
109 |     for node in torch_nodes.values():
110 |         if node.kind == 'prim::Constant': continue
111 |         if node.kind == 'prim::GetAttr': continue
112 |         layer_id = get_layer_id(node.debugName)
113 |         scope_id = get_scope_id(node.debugName)
114 | 
115 |         if node.kind == 'prim::ListConstruct':
116 |             # if that list constructor has only inputs of the same scope, ignore it
117 |             all_scope = True
118 |             for input in node.inputs:
119 |                 if get_scope_id(input) != scope_id:
120 |                     all_scope = False
121 |                     break
122 |             if all_scope:
123 |                 continue
124 | 
125 |         # if node.kind == 'aten::t': continue
126 | 
127 |         names_from_id[layer_id] = node.debugName
128 |         nodes_from_id[layer_id] = node
129 |         names_from_debug[node.debugName] = layer_id
130 |         scopes_from_debug[node.debugName] = scope_id
131 |         names_to_scope[layer_id] = scopes_from_debug[node.debugName]
132 |         if scope_id not in scope_nodes:
133 |             scope_nodes[scope_id] = [layer_id]
134 |         else:
135 |             scope_nodes[scope_id].append(layer_id)
136 | 
137 |     edges = dict()
138 |     edges_internal = dict()
139 | 
140 |     for node in torch_nodes.values():
141 |         if node.debugName not in names_from_debug: continue
142 |         layer_id = names_from_debug[node.debugName]
143 |         scope_id = scopes_from_debug[node.debugName]
144 | 
145 |         # print(node.debugName, '=>', layer_id, short_layer_id, node.kind, node.tensor_size)
146 |         edges[layer_id] = set()
147 | 
148 |         for input in node.inputs:
149 |             if layer_id not in edges_internal: edges_internal[layer_id] = []
150 |             edges_internal[layer_id].append(input)
151 | 
152 |             # filter unknown nodes
153 |             if input not in names_from_debug: continue
154 | 
155 |             # reference to itself is forbidden
156 |             if layer_id == names_from_debug[input]: continue
157 | 
158 |             # reference to its scope is forbidden
159 |             if scope_id == names_from_debug[input]: continue
160 | 
161 |             # print('   outgoing', names_from_debug[input], scopes_from_debug[input], input,
162 |             #       nodes_from_id[names_from_debug[input]].kind)
163 |             # this node points out of itself, so create an edge
164 |             edge_to = names_from_debug[input]
165 |             edges[layer_id].add(edge_to)
166 | 
167 |     deepkit_nodes = []
168 | 
169 |     nodes_names_to_display = set()
170 | 
171 |     def collect_nodes_to_display(inputs):
172 |         for input in inputs:
173 |             if input not in nodes_names_to_display:
174 |                 nodes_names_to_display.add(input)
175 |                 if input in edges:
176 |                     collect_nodes_to_display(edges[input])
177 | 
178 |     def find_outputs(name: str, outputs: set):
179 |         kind = nodes_from_id[name].kind
180 | 
181 |         if kind == 'IO Node' and len(edges[name]) != 1:
182 |             # an IO node with multiple inputs is probably correct already
183 |             outputs.add(name)
184 |             return
185 | 
186 |         if kind == 'IO Node' or kind == 'prim::TupleConstruct':
187 |             # resolve inputs
188 |             for input in edges[name]:
189 |                 find_outputs(input, outputs)
190 |         else:
191 |             outputs.add(name)
192 | 
193 |     for name in edges.copy().keys():
194 |         if name.startswith('output/'):
195 |             collect_nodes_to_display(edges[name])
196 | 
197 |             # resolve first to first nodes with available shape, and then use those as output
198 |             # this is necessary since tuple outputs come via prim::TupleConstruct and no shape.
199 |             found_outputs = set()
200 |             find_outputs(name, found_outputs)
201 |             i = 0
202 |             # print('found new outputs', name, found_outputs)
203 | 
204 |             for output in found_outputs:
205 |                 i += 1
206 |                 new_name = 'output/output.' + str(i)
207 |                 edges[new_name] = edges[name]
208 |                 nodes_from_id[new_name] = nodes_from_id[output]
209 |                 names_to_scope[new_name] = ''
210 | 
211 |                 nodes_names_to_display.add(new_name)
212 | 
213 |     activation_functions = set(map(str.lower, [
214 |         'ReLU6',
215 |         'LogSigmoid',
216 |         'LeakyReLU',
217 |         'MultiheadAttention',
218 |         'elu', 'hardshrink', 'hardtanh', 'leaky_relu', 'logsigmoid', 'prelu',
219 |         'rrelu', 'relu',
220 |         'sigmoid', 'elu', 'celu', 'selu', 'glu', 'gelu', 'softplus', 'softshrink', 'softsign',
221 |         'tanh', 'tanhshrink',
222 |         'softmin', 'softmax', 'softmax2d', 'log_softmax', 'LogSoftmax',
223 |         'AdaptiveLogSoftmaxWithLoss'
224 |     ]))
225 | 
226 |     input_names = []
227 |     output_names = []
228 | 
229 |     record_map = dict()
230 |     for name in nodes_names_to_display:
231 |         inputs = edges[name] if name in edges else []
232 |         # for [name, inputs] in edges.items():
233 |         torch_node = nodes_from_id[name]
234 |         scope_name = names_to_scope[name]
235 |         if not name:
236 |             raise Exception('No name given')
237 | 
238 |         node_type = 'layer'
239 |         scope_id = scope_name
240 |         recordable = False
241 | 
242 |         # filterer_inputs = []
243 |         if name.startswith('input/'):
244 |             recordable = True
245 |             node_type = 'input'
246 |             input_names.append(name)
247 | 
248 |         if name.startswith('output/'):
249 |             recordable = True
250 |             node_type = 'output'
251 |             output_names.append(name)
252 | 
253 |         # for input in inputs:
254 |         #     # second_parent = get_parent(names_to_scope[input], 2)
255 |         #     # if second_parent and not scope_name.startswith(second_parent):
256 |         #     #     continue
257 |         #     if input.startswith('input/input'):
258 |         #         filterer_inputs.append(input)
259 |         #         continue
260 |         #     if input in edges: filterer_inputs.append(input)
261 | 
262 |         attributes = {}
263 |         node_sub_type = ''
264 |         node_label = name
265 | 
266 |         if node_type != 'output':
267 |             if scope_name and scope_name in scope_nodes and len(
268 |                     scope_nodes[scope_name]) == 1 and scope_name in known_modules_name_map:
269 |                 # this node is at the same time a module(and thus scope), since it only has one node.
270 |                 recordable = True
271 |                 record_map[scope_name] = name
272 |                 node_label = scope_name
273 |                 module = known_modules_name_map[scope_name]
274 |                 node_sub_type = type(module).__name__
275 |                 scope_id = get_parent(scope_name)
276 |                 attributes = extract_attributes(module)
277 |             else:
278 |                 if str(torch_node.kind).startswith('aten::'):
279 |                     node_type = 'op'
280 |                     node_sub_type = torch_node.kind.replace('aten::', '').strip('_')
281 | 
282 |                 if str(torch_node.kind).startswith('prim::'):
283 |                     node_type = 'primitive'
284 |                     node_sub_type = torch_node.kind.replace('prim::', '').strip('_')
285 | 
286 |             if node_sub_type.lower() in activation_functions:
287 |                 node_type = 'activation'
288 |                 node_sub_type = node_sub_type
289 | 
290 |         # attributes['torch.debugName'] = torch_node.debugName
291 |         # attributes['torch.kind'] = torch_node.kind
292 |         # attributes['torch.inputs'] = ', '.join(torch_node.inputs)
293 | 
294 |         # source = str(torch_node.node.debugName).split(' # ')[1].strip() \
295 |         #     if hasattr(torch_node.node, 'debugName') and ' # ' in str(torch_node.node.debugName) else None
296 | 
297 |         node = {
298 |             'id': name,
299 |             'label': node_label,
300 |             'type': node_type,
301 |             'subType': node_sub_type,
302 |             # 'source': source,
303 |             'input': list(inputs),
304 |             'attributes': attributes,
305 |             'recordable': recordable,
306 |             'scope': scope_id.replace('.', '/'),
307 |             'shape': torch_node.tensor_size,
308 |         }
309 |         deepkit_nodes.append(node)
310 | 
311 |     scopes = []
312 |     for name, module in known_modules_name_map.items():
313 |         # skip modules that are already added as nodes
314 |         if name in scope_nodes and len(scope_nodes[name]) == 1:
315 |             continue
316 | 
317 |         scope_id = name.replace('.', '/')
318 |         record_map[name] = scope_id
319 | 
320 |         # the root scope is not recordable. For that we have global input and outputs
321 |         recordable = '/' in scope_id
322 | 
323 |         scope = {
324 |             'id': scope_id,
325 |             'label': scope_id,
326 |             'subType': type(module).__name__,
327 |             'recordable': recordable,
328 |             'attributes': extract_attributes(module)
329 |         }
330 |         scopes.append(scope)
331 | 
332 |     graph = {
333 |         'nodes': deepkit_nodes,
334 |         'scopes': scopes,
335 |     }
336 | 
337 |     return graph, record_map, input_names, output_names
338 | 
339 | 
340 | class TorchDebugger:
341 |     def __init__(self, debugger: deepkit.debugger.DebuggerManager, net, graph_name: str, resolve_map):
342 |         self.known_modules_map = dict()
343 |         self.known_modules_name_map = dict()
344 |         self.debugger = debugger
345 | 
346 |         for name, module in net.named_modules(prefix=type(net).__name__):
347 |             self.known_modules_map[module] = name
348 |             self.known_modules_name_map[name] = module
349 | 
350 |         self.net = net
351 |         self.graph_name = graph_name
352 |         self.resolve_map = resolve_map
353 | 
354 |         # contains a map of recording map, names from nodes of the full graph to actual modules
355 |         # this is necessary since we map certain internal nodes to a scope/layer/module.
356 |         self.record_map = dict()
357 |         self.model_input_names = []
358 |         self.model_output_names = []
359 |         self.model_input = None
360 |         self.extract_graph = False
361 | 
362 |         self.fetch_result: Dict[str, deepkit.debugger.DebuggerFetchItem] = dict()
363 |         self.fetch_config: Optional[deepkit.debugger.DebuggerFetchConfig] = None
364 | 
365 |         def root_hook(module, input):
366 |             if self.extract_graph: return
367 |             if self.debugger.active_debug_data_for_this_run: return
368 | 
369 |             if self.model_input is None:
370 |                 self.model_input = input
371 |                 self.extract_graph = True
372 |                 self.record_map, self.model_input_names, self.model_output_names = self.resolve_map(input)
373 |                 self.extract_graph = False
374 |             else:
375 |                 self.debugger.tick()
376 | 
377 |         net.register_forward_pre_hook(root_hook)
378 | 
379 |         self.net.apply(self.register_hook)
380 | 
381 |     def fetch(self, fetch_config: deepkit.debugger.DebuggerFetchConfig) -> Dict[
382 |         str, deepkit.debugger.DebuggerFetchItem]:
383 |         self.fetch_config = fetch_config
384 |         self.fetch_result = dict()
385 | 
386 |         if not self.model_input:
387 |             return self.fetch_result
388 | 
389 |         if len(self.model_input_names) > 1:
390 |             for i, name in enumerate(self.model_input_names):
391 |                 self.send_debug(name, self.net, self.model_input[i])
392 |         elif len(self.model_input_names) == 1:
393 |             self.send_debug(self.model_input_names[0], self.net, self.model_input)
394 | 
395 |         self.net(*self.model_input)
396 | 
397 |         return self.fetch_result
398 | 
399 |     def register_hook(self, module):
400 |         def hook(module, input, output):
401 |             if self.extract_graph: return
402 |             if not self.debugger.active_debug_data_for_this_run:
403 |                 # we don't care about hook calls outside of our debug tracking
404 |                 return
405 | 
406 |             module_id = self.known_modules_map[module]
407 |             node_id = module_id
408 |             if '.' not in module_id:
409 |                 # we are in the root module, so we use that for global output tracking
410 |                 if len(self.model_output_names) > 1:
411 |                     for i, name in enumerate(self.model_output_names):
412 |                         self.send_debug(name, module, output[i])
413 |                 elif len(self.model_output_names) == 1:
414 |                     self.send_debug(self.model_output_names[0], module, output)
415 |             else:
416 |                 # sub node
417 |                 self.send_debug(node_id, module, output)
418 | 
419 |         module.register_forward_hook(hook)
420 | 
421 |     def get_histogram(self, x, tensor):
422 |         h = np.histogram(tensor.cpu().detach().numpy(), bins=20)
423 |         # <version><x><bins><...x><...y>, little endian
424 |         # uint8|Uint32|Uint16|...Float32|...Uint32
425 |         # B|L|H|...f|...L
426 |         return pack('<BIH', 1, int(x), h[0].size) + h[1].astype('<f').tobytes() + h[0].astype('<I').tobytes()
427 | 
428 |     def get_debug_data(self, x, module, output):
429 |         image = None
430 |         activations = None
431 |         if isinstance(output, tuple) and len(output) > 0:
432 |             output = output[0]
433 | 
434 |         if hasattr(output, 'shape'):
435 |             activations = self.get_histogram(x, output)
436 | 
437 |             if len(output.shape) > 1:
438 |                 # outputs come in batch usually, so pick first
439 |                 sample = output[0].cpu().detach().numpy()
440 |                 if len(sample.shape) == 3:
441 |                     if sample.shape[0] == 3:
442 |                         image = PIL.Image.fromarray(get_layer_vis_square(sample))
443 |                     else:
444 |                         image = PIL.Image.fromarray(get_image_tales(sample))
445 |                 elif len(sample.shape) > 1:
446 |                     image = PIL.Image.fromarray(get_layer_vis_square(sample))
447 |                 elif len(sample.shape) == 1:
448 |                     if sample.shape[0] == 1:
449 |                         # we got a single number
450 |                         output = sample[0]
451 |                     else:
452 |                         image = make_image_from_dense(sample)
453 |         # elif isinstance(output[0], (float, str, int)):
454 |         #     image = output
455 | 
456 |         whistogram = None
457 |         bhistogram = None
458 | 
459 |         if hasattr(module, 'weight') and module.weight is not None:
460 |             whistogram = self.get_histogram(x, module.weight)
461 | 
462 |         if hasattr(module, 'bias') and module.bias is not None:
463 |             bhistogram = self.get_histogram(x, module.bias)
464 | 
465 |         output_rep = None
466 |         if isinstance(image, PIL.Image.Image):
467 |             output_rep = image
468 |         elif isinstance(output, (float, np.floating)):
469 |             output_rep = float(output)
470 |         elif isinstance(output, (int, np.integer)):
471 |             output_rep = int(output)
472 | 
473 |         return output_rep, activations, whistogram, bhistogram
474 | 
475 |     def send_debug(self, node_id, module, output):
476 |         if node_id in self.record_map:
477 |             node_id = self.record_map[node_id]
478 |         node_id = self.graph_name + ':' + node_id
479 | 
480 |         if self.fetch_config.needs_fetch(node_id):
481 |             output_rep, ahistogram, whistogram, bhistogram = self.get_debug_data(
482 |                 self.fetch_config.x, module, output
483 |             )
484 | 
485 |             self.fetch_result[node_id] = deepkit.debugger.DebuggerFetchItem(
486 |                 name=node_id,
487 |                 output=output_rep,
488 |                 ahistogram=ahistogram,
489 |                 whistogram=whistogram,
490 |                 bhistogram=bhistogram,
491 |             )
492 | 


--------------------------------------------------------------------------------
/deepkit/client.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import inspect
  3 | import json
  4 | import os
  5 | import sys
  6 | import threading
  7 | from asyncio import Future
  8 | import datetime
  9 | from enum import Enum
 10 | from typing import Dict, Optional
 11 | 
 12 | import numpy as np
 13 | import websockets
 14 | from rx.subject import BehaviorSubject
 15 | 
 16 | import deepkit.globals
 17 | from deepkit.home import get_home_config
 18 | from deepkit.model import FolderLink
 19 | 
 20 | 
 21 | def is_in_directory(filepath, directory):
 22 |     return os.path.realpath(filepath).startswith(os.path.realpath(directory))
 23 | 
 24 | 
 25 | class ApiError(Exception):
 26 |     pass
 27 | 
 28 | 
 29 | def json_converter(obj):
 30 |     if isinstance(obj, np.integer):
 31 |         return int(obj)
 32 |     elif isinstance(obj, np.floating):
 33 |         return float(obj)
 34 |     elif isinstance(obj, np.float):
 35 |         return float(obj)
 36 |     elif isinstance(obj, np.ndarray):
 37 |         return obj.tolist()
 38 |     elif isinstance(obj, datetime.datetime):
 39 |         # we assume all datetime instances are UTC
 40 |         return obj.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
 41 |     else:
 42 |         return str(obj)
 43 | 
 44 | 
 45 | class JobStatus(Enum):
 46 |     done = 150  # when all tasks are done
 47 |     aborted = 200  # when at least one task aborted
 48 |     failed = 250  # when at least one task failed
 49 |     crashed = 300  # when at least one task crashed
 50 | 
 51 | 
 52 | class Client(threading.Thread):
 53 |     connection: websockets.WebSocketClientProtocol
 54 | 
 55 |     def __init__(self, project: Optional[str] = None,
 56 |                  account: Optional[str] = None,
 57 |                  try_pick_up=False,
 58 |                  parent_experiment=None,
 59 |                  silent=False):
 60 |         self.connected = BehaviorSubject(False)
 61 |         self.project = project
 62 |         self.account = account
 63 |         self.parent_experiment = parent_experiment
 64 |         self.silent = silent
 65 | 
 66 |         self.host = os.environ.get('DEEPKIT_HOST', '127.0.0.1')
 67 |         self.socket_path = os.environ.get('DEEPKIT_SOCKET', None)
 68 |         self.ssl = os.environ.get('DEEPKIT_SSL', '0') is '1'
 69 |         self.port = int(os.environ.get('DEEPKIT_PORT', '8960'))
 70 | 
 71 |         self.job_token = None
 72 |         self.job_id = None
 73 | 
 74 |         if try_pick_up:
 75 |             # is set by Deepkit cli
 76 |             self.job_token = os.environ.get('DEEPKIT_JOB_ACCESSTOKEN', None)
 77 |             self.job_id = os.environ.get('DEEPKIT_JOB_ID', None)
 78 | 
 79 |         # is set by deepkit.login()
 80 |         self.token = os.environ.get('DEEPKIT_ACCESSTOKEN', None)
 81 | 
 82 |         self.result_status = None
 83 | 
 84 |         self.message_id = 0
 85 |         self.callbacks: Dict[int, asyncio.Future] = {}
 86 |         self.subscriber: Dict[int, any] = {}
 87 |         self.stopping = False
 88 |         self.queue = []
 89 |         self.controllers = {}
 90 |         self.patches = {}
 91 |         self.offline = False
 92 |         self.connections = 0
 93 |         self.lock = threading.Lock()
 94 |         threading.Thread.__init__(self)
 95 |         self.daemon = True
 96 |         self.loop = asyncio.new_event_loop()
 97 |         self.start()
 98 | 
 99 |     def is_connected(self):
100 |         return self.connected.value
101 | 
102 |     def run(self):
103 |         self.connecting = self.loop.create_future()
104 |         self.loop.run_forever()
105 | 
106 |     def connect(self):
107 |         asyncio.run_coroutine_threadsafe(self._connect(), self.loop)
108 | 
109 |     def connect_anon(self):
110 |         asyncio.run_coroutine_threadsafe(self._connect_anon(), self.loop).result()
111 | 
112 |     def shutdown(self):
113 |         if self.offline: return
114 |         promise = asyncio.run_coroutine_threadsafe(self.stop_and_sync(), self.loop)
115 |         promise.result()
116 |         if not self.connection.closed:
117 |             raise Exception('Connection still active')
118 |         self.loop.stop()
119 | 
120 |     async def stop_and_sync(self):
121 |         self.stopping = True
122 | 
123 |         if deepkit.utils.in_self_execution() or self.result_status:
124 |             # only when we are in self execution do we set status, time stamps etc
125 |             # otherwise the CLI is doing that and the server. Or when
126 |             # the experiment set result_state explicitly.
127 | 
128 |             # done = 150, //when all tasks are done
129 |             # aborted = 200, //when at least one task aborted
130 |             # failed = 250, //when at least one task failed
131 |             # crashed = 300, //when at least one task crashed
132 |             self.patches['status'] = 150
133 |             self.patches['ended'] = datetime.datetime.utcnow()
134 |             self.patches['tasks.main.ended'] = datetime.datetime.utcnow()
135 | 
136 |             # done = 500,
137 |             # aborted = 550,
138 |             # failed = 600,
139 |             # crashed = 650,
140 |             self.patches['tasks.main.status'] = 500
141 |             self.patches['tasks.main.instances.0.ended'] = datetime.datetime.utcnow()
142 | 
143 |             # done = 500,
144 |             # aborted = 550,
145 |             # failed = 600,
146 |             # crashed = 650,
147 |             self.patches['tasks.main.instances.0.status'] = 500
148 | 
149 |             if hasattr(sys, 'last_value'):
150 |                 if isinstance(sys.last_value, KeyboardInterrupt):
151 |                     self.patches['status'] = 200
152 |                     self.patches['tasks.main.status'] = 550
153 |                     self.patches['tasks.main.instances.0.status'] = 550
154 |                 else:
155 |                     self.patches['status'] = 300
156 |                     self.patches['tasks.main.status'] = 650
157 |                     self.patches['tasks.main.instances.0.status'] = 650
158 | 
159 |             if self.result_status:
160 |                 self.patches['status'] = self.result_status.value
161 | 
162 |         while len(self.patches) > 0 or len(self.queue) > 0:
163 |             await asyncio.sleep(0.15)
164 | 
165 |         await self.connection.close()
166 | 
167 |     def register_controller(self, name: str, controller):
168 |         return asyncio.run_coroutine_threadsafe(self._register_controller(name, controller), self.loop)
169 | 
170 |     async def _register_controller(self, name: str, controller):
171 |         self.controllers[name] = controller
172 | 
173 |         async def handle_peer_message(message, done):
174 |             if message['type'] == 'error':
175 |                 done()
176 |                 del self.controllers[name]
177 |                 raise Exception('Register controller error: ' + message['error'])
178 | 
179 |             if message['type'] == 'ack':
180 |                 pass
181 | 
182 |             if message['type'] == 'peerController/message':
183 |                 data = message['data']
184 | 
185 |                 if not hasattr(controller, data['action']):
186 |                     error = f"Requested action {message['action']} not available in {name}"
187 |                     print(error, file=sys.stderr)
188 |                     await self._message({
189 |                         'name': 'peerController/message',
190 |                         'controllerName': name,
191 |                         'clientId': message['clientId'],
192 |                         'data': {'type': 'error', 'id': data['id'], 'stack': None, 'entityName': '@error:default',
193 |                                  'error': error}
194 |                     }, no_response=True)
195 | 
196 |                 if data['name'] == 'actionTypes':
197 |                     parameters = []
198 | 
199 |                     i = 0
200 |                     for arg in inspect.getfullargspec(getattr(controller, data['action'])).args:
201 |                         parameters.append({
202 |                             'type': 'any',
203 |                             'name': '#' + str(i)
204 |                         })
205 |                         i += 1
206 | 
207 |                     await self._message({
208 |                         'name': 'peerController/message',
209 |                         'controllerName': name,
210 |                         'clientId': message['clientId'],
211 |                         'data': {
212 |                             'type': 'actionTypes/result',
213 |                             'id': data['id'],
214 |                             'parameters': parameters,
215 |                             'returnType': {'type': 'any', 'name': 'result'}
216 |                         }
217 |                     }, no_response=True)
218 | 
219 |                 if data['name'] == 'action':
220 |                     try:
221 |                         res = await getattr(controller, data['action'])(*data['args'])
222 | 
223 |                         await self._message({
224 |                             'name': 'peerController/message',
225 |                             'controllerName': name,
226 |                             'clientId': message['clientId'],
227 |                             'data': {
228 |                                 'type': 'next/json',
229 |                                 'id': data['id'],
230 |                                 'encoding': {'name': 'r', 'type': 'any'},
231 |                                 'next': res,
232 |                             }
233 |                         }, no_response=True)
234 |                     except Exception as e:
235 |                         await self._message({
236 |                             'name': 'peerController/message',
237 |                             'controllerName': name,
238 |                             'clientId': message['clientId'],
239 |                             'data': {'type': 'error', 'id': data['id'], 'stack': None, 'entityName': '@error:default',
240 |                                      'error': str(e)}
241 |                         }, no_response=True)
242 | 
243 |         def subscriber(message, on_done):
244 |             self.loop.create_task(handle_peer_message(message, on_done))
245 | 
246 |         await self._subscribe({
247 |             'name': 'peerController/register',
248 |             'controllerName': name,
249 |         }, subscriber)
250 | 
251 |         class Controller:
252 |             def __init__(self, client):
253 |                 self.client = client
254 | 
255 |             def stop(self):
256 |                 self.client._message({
257 |                     'name': 'peerController/unregister',
258 |                     'controllerName': name,
259 |                 }, no_response=True)
260 | 
261 |         return Controller(self)
262 | 
263 |     async def _action(self, controller: str, action: str, args=None, lock=True, allow_in_shutdown=False):
264 |         if args is None:
265 |             args = []
266 | 
267 |         if lock: await self.connecting
268 |         if self.offline: return
269 |         if self.stopping and not allow_in_shutdown: raise Exception('In shutdown: actions disallowed')
270 | 
271 |         if not controller: raise Exception('No controller given')
272 |         if not action: raise Exception('No action given')
273 | 
274 |         # print('> action', action, threading.current_thread().name)
275 |         res = await self._message({
276 |             'name': 'action',
277 |             'controller': controller,
278 |             'action': action,
279 |             'args': args,
280 |             'timeout': 60
281 |         }, lock=lock)
282 | 
283 |         # print('< action', action)
284 | 
285 |         if res['type'] == 'next/json':
286 |             return res['next'] if 'next' in res else None
287 | 
288 |         if res['type'] == 'error':
289 |             print(res, file=sys.stderr)
290 |             raise ApiError('API Error: ' + str(res['error']))
291 | 
292 |         raise ApiError(f"Invalid action type '{res['type']}'. Not implemented")
293 | 
294 |     def app_action_threadsafe(self, action: str, args=None) -> Future:
295 |         if args is None: args = []
296 |         return asyncio.run_coroutine_threadsafe(self._action('app', action, args), self.loop)
297 | 
298 |     async def job_action(self, action: str, args=None):
299 |         return await self._action('job', action, args)
300 | 
301 |     def job_action_threadsafe(self, action: str, args=None) -> Future:
302 |         """
303 |         This method is non-blocking and every try to block-wait for an answers means
304 |         script execution stops when connection is broken (offline training entirely impossible).
305 |         So, we just schedule the call and return a Future, which the user can subscribe to.
306 |         """
307 |         if args is None: args = []
308 |         return asyncio.run_coroutine_threadsafe(self._action('job', action, args), self.loop)
309 | 
310 |     async def _subscribe(self, message, subscriber):
311 |         await self.connecting
312 | 
313 |         self.message_id += 1
314 |         message['id'] = self.message_id
315 | 
316 |         message_id = self.message_id
317 | 
318 |         def on_done():
319 |             del self.subscriber[message_id]
320 | 
321 |         def on_incoming_message(incoming_message):
322 |             subscriber(incoming_message, on_done)
323 | 
324 |         self.subscriber[self.message_id] = on_incoming_message
325 |         self.queue.append(message)
326 | 
327 |     def _create_message(self, message: dict, lock=True, no_response=False) -> dict:
328 |         self.message_id += 1
329 |         message['id'] = self.message_id
330 |         if not no_response:
331 |             self.callbacks[self.message_id] = self.loop.create_future()
332 | 
333 |         return message
334 | 
335 |     async def _message(self, message, lock=True, no_response=False):
336 |         if lock: await self.connecting
337 | 
338 |         message = self._create_message(message, no_response=no_response)
339 |         self.queue.append(message)
340 | 
341 |         if no_response:
342 |             return
343 | 
344 |         return await self.callbacks[self.message_id]
345 | 
346 |     def patch(self, path: str, value: any):
347 |         if self.offline: return
348 |         if self.stopping: return
349 | 
350 |         self.patches[path] = value
351 | 
352 |     async def send_messages(self, connection):
353 |         while not connection.closed:
354 |             try:
355 |                 q = self.queue[:]
356 |                 for m in q:
357 |                     try:
358 |                         j = json.dumps(m, default=json_converter)
359 |                     except TypeError as e:
360 |                         print('Could not send message since JSON error', e, m, file=sys.stderr)
361 |                         continue
362 |                     await connection.send(j)
363 |                     self.queue.remove(m)
364 |             except Exception as e:
365 |                 print("Failed sending, exit send_messages", file=sys.stderr)
366 |                 raise e
367 | 
368 |             if len(self.patches) > 0:
369 |                 # we have to send first all messages/actions out
370 |                 # before sending patches, as most of the time
371 |                 # patches are based on previously created entities,
372 |                 # so we need to make sure those entities are created
373 |                 # first before sending any patches.
374 |                 # print('patches', self.patches)
375 |                 try:
376 |                     send = self.patches.copy()
377 |                     await connection.send(json.dumps({
378 |                         'name': 'action',
379 |                         'controller': 'job',
380 |                         'action': 'patchJob',
381 |                         'args': [
382 |                             send
383 |                         ],
384 |                         'timeout': 60
385 |                     }, default=json_converter))
386 | 
387 |                     for i in send.keys():
388 |                         if self.patches[i] == send[i]:
389 |                             del self.patches[i]
390 |                 except websockets.exceptions.ConnectionClosed:
391 |                     return
392 |                 except ApiError:
393 |                     print("Patching failed. Syncing job data disabled.", file=sys.stderr)
394 |                     return
395 | 
396 |             await asyncio.sleep(0.5)
397 | 
398 |     async def handle_messages(self, connection):
399 |         while not connection.closed:
400 |             try:
401 |                 res = json.loads(await connection.recv())
402 |             except websockets.exceptions.ConnectionClosedError:
403 |                 # we need reconnect
404 |                 break
405 |             except websockets.exceptions.ConnectionClosedOK:
406 |                 # we closed on purpose, so no reconnect necessary
407 |                 return
408 | 
409 |             if res and 'id' in res:
410 |                 if res['id'] in self.subscriber:
411 |                     self.subscriber[res['id']](res)
412 | 
413 |                 if res['id'] in self.callbacks:
414 |                     self.callbacks[res['id']].set_result(res)
415 |                     del self.callbacks[res['id']]
416 | 
417 |         if not self.stopping:
418 |             self.log("Deepkit: lost connection. reconnect ...")
419 |             self.connecting = self.loop.create_future()
420 |             self.connected.on_next(False)
421 |             self.loop.create_task(self._connect())
422 | 
423 |     async def _connected(self, id: str, token: str):
424 |         try:
425 |             if self.socket_path:
426 |                 self.connection = await websockets.unix_connect(self.socket_path)
427 |             else:
428 |                 ws = 'wss' if self.ssl else 'ws'
429 |                 url = f"{ws}://{self.host}:{self.port}"
430 |                 self.connection = await websockets.connect(url)
431 |         except Exception as e:
432 |             # try again later
433 |             self.log('Unable to connect', e)
434 |             await asyncio.sleep(1)
435 |             self.loop.create_task(self._connect())
436 |             return
437 | 
438 |         self.loop.create_task(self.handle_messages(self.connection))
439 |         # we don't use send_messages() since this would send all queue/patches
440 |         # which would lead to permission issues when we're not first authenticated
441 | 
442 |         if token:
443 |             message = self._create_message({
444 |                 'name': 'authenticate',
445 |                 'token': {
446 |                     'id': 'job',
447 |                     'token': token,
448 |                     'job': id
449 |                 }
450 |             }, lock=False)
451 | 
452 |             await self.connection.send(json.dumps(message, default=json_converter))
453 | 
454 |             res = await self.callbacks[message['id']]
455 |             if not res['result'] or res['result'] is not True:
456 |                 raise Exception('Job token invalid')
457 | 
458 |         self.loop.create_task(self.send_messages(self.connection))
459 | 
460 |         self.connecting.set_result(True)
461 |         if self.connections > 0:
462 |             self.log("Deepkit: Reconnected.")
463 | 
464 |         self.connected.on_next(True)
465 |         self.connections += 1
466 | 
467 |     async def _connect_anon(self):
468 |         ws = 'wss' if self.ssl else 'ws'
469 |         url = f"{ws}://{self.host}:{self.port}"
470 |         self.connection = await websockets.connect(url)
471 |         self.loop.create_task(self.handle_messages(self.connection))
472 |         self.loop.create_task(self.send_messages(self.connection))
473 | 
474 |         self.connecting.set_result(True)
475 |         self.connected.on_next(True)
476 |         self.connections += 1
477 | 
478 |     async def _connect(self):
479 |         # we want to restart with a empty queue, so authentication happens always first
480 |         queue_copy = self.queue[:]
481 |         self.queue = []
482 | 
483 |         if self.job_token:
484 |             await self._connected(self.job_id, self.job_token)
485 |             return
486 | 
487 |         try:
488 |             link: Optional[FolderLink] = None
489 | 
490 |             user_token = self.token
491 |             account_name = 'none'
492 | 
493 |             if not user_token:
494 |                 config = get_home_config()
495 |                 # when no user_token is given (via deepkit.login() for example)
496 |                 # we need to find the host, port, token from the user config in ~/.deepkit/config
497 |                 if not self.account and not self.project:
498 |                     # find both, start with
499 |                     link = config.get_folder_link_of_directory(sys.path[0])
500 |                     account_config = config.get_account_for_id(link.accountId)
501 |                 elif self.account and not self.project:
502 |                     account_config = config.get_account_for_name(self.account)
503 |                 else:
504 |                     # default to first account configured
505 |                     account_config = config.get_first_account()
506 | 
507 |                 account_name = account_config.name
508 |                 self.host = account_config.host
509 |                 self.port = account_config.port
510 |                 self.ssl = account_config.ssl
511 |                 user_token = account_config.token
512 | 
513 |             ws = 'wss' if self.ssl else 'ws'
514 |             try:
515 |                 url = f"{ws}://{self.host}:{self.port}"
516 |                 self.connection = await websockets.connect(url)
517 |             except Exception as e:
518 |                 self.offline = True
519 |                 print(f"Deepkit: App not started or server not reachable. Monitoring disabled. {e}", file=sys.stderr)
520 |                 self.connecting.set_result(False)
521 |                 return
522 | 
523 |             self.loop.create_task(self.handle_messages(self.connection))
524 |             self.loop.create_task(self.send_messages(self.connection))
525 | 
526 |             res = await self._message({
527 |                 'name': 'authenticate',
528 |                 'token': {
529 |                     'id': 'user',
530 |                     'token': user_token
531 |                 }
532 |             }, lock=False)
533 |             if not res['result']:
534 |                 raise Exception('Login invalid')
535 | 
536 |             project_name = ''
537 |             if link:
538 |                 project_name = link.name
539 |                 projectId = link.projectId
540 |             else:
541 |                 if not self.project:
542 |                     raise Exception('No project defined. Please use project="project-name" '
543 |                                     'to specify which project to use.')
544 | 
545 |                 project = await self._action('app', 'getProjectForPublicName', [self.project], lock=False)
546 | 
547 |                 if not project:
548 |                     raise Exception(
549 |                         f'No project found for name {self.project}. Make sure it exists before using it. '
550 |                         f'Do you use the correct account? (used {account_name})')
551 |                 project_name = project['name']
552 |                 projectId = project['id']
553 | 
554 |             job = await self._action('app', 'createJob', [projectId, self.parent_experiment],
555 |                                      lock=False)
556 | 
557 |             prefix = "Sub experiment" if self.parent_experiment else "Experiment"
558 |             self.log(f"{prefix} #{job['number']} created in project {project_name} using account {account_name}")
559 | 
560 |             deepkit.globals.loaded_job_config = job['config']['config']
561 |             self.job_token = await self._action('app', 'getJobAccessToken', [job['id']], lock=False)
562 |             self.job_id = job['id']
563 | 
564 |             # todo, implement re-authentication, so we don't have to drop the active connection
565 |             await self.connection.close()
566 |             await self._connected(self.job_id, self.job_token)
567 |         except Exception as e:
568 |             self.connecting.set_exception(e)
569 | 
570 |         self.queue = queue_copy + self.queue
571 | 
572 |     def log(self, *message: str):
573 |         if not self.silent: print(*message)
574 | 


--------------------------------------------------------------------------------
/deepkit/keras_tf.py:
--------------------------------------------------------------------------------
  1 | import inspect
  2 | import sys
  3 | from os.path import dirname
  4 | from struct import pack
  5 | from typing import Dict, Optional, List
  6 | 
  7 | import PIL.Image
  8 | import numpy as np
  9 | 
 10 | if 'keras' in sys.modules:
 11 |     import keras
 12 | else:
 13 |     import tensorflow.keras as keras
 14 | 
 15 | import tensorflow as tf
 16 | 
 17 | import deepkit.debugger
 18 | from deepkit.utils.image import get_layer_vis_square, get_image_tales, make_image_from_dense
 19 | 
 20 | if 'keras' in sys.modules:
 21 |     from keras import Model
 22 | else:
 23 |     from tensorflow.keras import Model
 24 | 
 25 | 
 26 | def count_params(weights):
 27 |     return int(sum(np.prod(p.shape.as_list()) for p in weights))
 28 | 
 29 | 
 30 | def get_tf_shape_as_list(tf_shape_dim):
 31 |     return list(map(lambda x: x.size, list(tf_shape_dim)))
 32 | 
 33 | 
 34 | def extract_model_graph(model):
 35 |     def extract_attributes(layer):
 36 |         attrs = [
 37 |             # InputLayer
 38 |             'input_shape', 'batch_size', 'dtype', 'sparse', 'ragged',
 39 | 
 40 |             # conv, LocallyConnected1D
 41 |             'rank', 'filters', 'kernel_size', 'strides', 'padding', 'data_format', 'dilation_rate',
 42 |             'use_bias',
 43 |             'kernel_initializer', 'bias_initializer', 'kernel_regularizer', 'bias_regularizer',
 44 |             'activity_regularizer', 'kernel_constraint', 'bias_constraint',
 45 | 
 46 |             # pooling
 47 |             'pool_size', 'strides', 'padding', 'data_format',
 48 |             'pool_function',
 49 | 
 50 |             # RNN
 51 |             'cell', 'return_sequences', 'return_state', 'go_backwards', 'stateful', 'unroll', 'time_major',
 52 |             # RNNCell
 53 |             'units', 'recurrent_activation', 'use_bias', 'kernel_initializer', 'recurrent_initializer',
 54 |             'bias_initializer', 'unit_forget_bias', 'kernel_regularizer', 'recurrent_regularizer',
 55 |             'bias_regularizer', 'kernel_constraint', 'recurrent_constraint', 'bias_constraint',
 56 |             'dropout', 'recurrent_dropout', 'implementation',
 57 | 
 58 |             # Embedding
 59 |             'input_dim', 'output_dim',
 60 |             'embeddings_initializer', 'embeddings_regularizer', 'activity_regularizer',
 61 |             'embeddings_constraint', 'mask_zero', 'input_length', 'fused',
 62 | 
 63 |             # Merge
 64 |             'axes', 'normalize',
 65 | 
 66 |             # Noise
 67 |             'stddev', 'rate', 'noise_shape',
 68 | 
 69 |             # BatchNormalization
 70 |             'momentum', 'epsilon', 'center', 'scale',
 71 |             'beta_initializer', 'gamma_initializer', 'moving_mean_initializer', 'moving_variance_initializer',
 72 |             'beta_regularizer', '', 'gamma_regularizer', 'beta_constraint', 'gamma_constraint', 'renorm',
 73 |             'virtual_batch_size', 'adjustment'
 74 | 
 75 |                                   'rate', 'noise_shape',  # Dropout
 76 |             'data_format',  # Flatten
 77 |             'target_shape',  # Reshape
 78 |             'dims',  # Permute
 79 |             'n',  # RepeatVector
 80 |             'function',  # Lambda
 81 |             'l1', 'l2',  # ActivityRegularization
 82 |             'mask_value',  # Masking
 83 |         ]
 84 |         res = {}
 85 | 
 86 |         def normalize_value(name, v):
 87 |             if inspect.isfunction(v):
 88 |                 return v.__name__
 89 | 
 90 |             if isinstance(v, (str, int, float, bool)):
 91 |                 return v
 92 | 
 93 |             if isinstance(v, (list, tuple)):
 94 |                 return str(v)
 95 | 
 96 |             if type(v).__name__ != 'type':
 97 |                 # todo, if `cell` for RNN we probably want to extract those information as well
 98 |                 return type(v).__name__
 99 | 
100 |             return str(v)
101 | 
102 |         for attr in attrs:
103 |             if hasattr(layer, attr):
104 |                 res[attr] = getattr(layer, attr)
105 | 
106 |                 res[attr] = normalize_value(attr, res[attr])
107 | 
108 |         if hasattr(layer, 'activation'):
109 |             if layer.activation:
110 |                 res['activation'] = layer.activation.__name__
111 |                 # todo get action parameters. `alpha`, etc
112 | 
113 |         if hasattr(layer, 'trainable_weights'):
114 |             res['trainable_weights'] = count_params(layer.trainable_weights)
115 |         if hasattr(layer, 'non_trainable_weights'):
116 |             res['non_trainable_weights'] = count_params(layer.non_trainable_weights)
117 | 
118 |         return res
119 | 
120 |     def tensor_name_to_node_name(name: str) -> str:
121 |         return name[0:name.rindex(':')]
122 | 
123 |     def get_parent(name, go_up=1) -> str:
124 |         return '/'.join(name.split('/')[:go_up * -1])
125 | 
126 |     def get_scope_id(name: str):
127 |         """
128 |         Takes a name like 'dense_2/MatMul' and converts it to its scope `dense_2`.
129 |         Examples
130 |             1. 'dense_1/MatMul/ReadVariableOp/resource'
131 |             -> dense_1/MatMul/ReadVariableOp
132 |             2. 'dense_1/MatMul/ReadVariableOp'
133 |             -> dense_1/MatMul
134 |         """
135 |         return dirname(name)
136 | 
137 |     edges = dict()
138 |     nodes = dict()
139 |     names_to_scope = dict()
140 |     scope_nodes = dict()
141 |     input_names = []
142 |     output_names = []
143 |     record_map = dict()
144 | 
145 |     output_tensor = model.outputs[0] if hasattr(model, 'outputs') else model.output
146 |     if not hasattr(output_tensor, 'graph'):
147 |         # only tensorflow has `graph` defined.
148 |         graph = {'nodes': [], 'scopes': []}
149 |         return graph, record_map, input_names
150 | 
151 |     g = output_tensor.graph
152 |     tf_nodes = list(g.as_graph_def(add_shapes=True).node)
153 |     blacklist = {'Placeholder', 'PlaceholderWithDefault', 'Const'}
154 | 
155 |     model_scoped_layer_names = set()
156 |     model_unique_layer_names = set()
157 | 
158 |     def extract_layers(model, scope_id=''):
159 |         scope_prefix = ((scope_id + '/') if scope_id else '')
160 |         for layer in model.layers:
161 |             model_scoped_layer_names.add(scope_prefix + layer.name)
162 |             model_unique_layer_names.add(layer.name)
163 | 
164 |             if isinstance(layer, Model):
165 |                 extract_layers(layer, scope_prefix + layer.name)
166 | 
167 |     extract_layers(model, '')
168 | 
169 |     def get_scoped_name(full_name: str):
170 |         """
171 |         1. 'sequential_1/conv2d_1/convolution/ReadVariableOp'
172 |         => 'sequential_1', 'conv2d_1'
173 |         2. 'conv2d_1/convolution/ReadVariableOp'
174 |         => '', 'conv2d_1'
175 |         3. 'dense_1/MatMul'
176 |         => '', 'dense_1'
177 |         """
178 |         names = full_name.split('/')
179 |         scope = ''
180 |         name = ''
181 | 
182 |         for part in names:
183 |             next_scope = scope + ('/' if scope else '') + name
184 |             next_name = part
185 |             next_full_name = next_scope + ('/' if next_scope else '') + next_name
186 |             if next_full_name not in model_scoped_layer_names:
187 |                 break
188 |             scope = next_scope
189 |             name = next_name
190 | 
191 |         if not scope and not name and names[0] in model_unique_layer_names:
192 |             return '', names[0]
193 | 
194 |         return scope, name
195 | 
196 |     for tensor in model.inputs:
197 |         input_names.append(tensor_name_to_node_name(tensor.name))
198 | 
199 |     for tensor in model.outputs:
200 |         output_names.append(tensor_name_to_node_name(tensor.name))
201 | 
202 |     for node in tf_nodes:
203 |         is_input = node.name in input_names
204 |         if node.op in blacklist and not is_input: continue
205 | 
206 |         nodes[node.name] = node
207 |         scope_id = get_scope_id(node.name)
208 |         names_to_scope[node.name] = scope_id
209 | 
210 |         if scope_id not in scope_nodes:
211 |             scope_nodes[names_to_scope[node.name]] = []
212 | 
213 |         scope_nodes[scope_id].append(node.name)
214 | 
215 |     for node in nodes.values():
216 |         edges[node.name] = set()
217 |         node_scope, node_name = get_scoped_name(node.name)
218 | 
219 |         for input in node.input:
220 |             # filter unknown nodes
221 |             if input not in nodes: continue
222 | 
223 |             input_scope, input_name = get_scoped_name(input)
224 |             if input_name == node_name and node_scope != input_scope:
225 |                 # 'sequential_1/conv2d_1/convolution/ReadVariableOp' points to
226 |                 # its internals at 'conv2d_1/kernel', which are both conv2d_1, but
227 |                 # on different scopes, which mean we don't display `conv2d_1/kernel`, since
228 |                 # its only internals.
229 |                 continue
230 | 
231 |             edges[node.name].add(input)
232 | 
233 |     nodes_names_to_display = set()
234 | 
235 |     def collect_nodes_to_display(inputs):
236 |         for input in inputs:
237 |             if input not in nodes_names_to_display:
238 |                 nodes_names_to_display.add(input)
239 |                 if input in edges:
240 |                     collect_nodes_to_display(edges[input])
241 | 
242 |     dk_nodes = []
243 |     dk_scopes = []
244 | 
245 |     primitive = {'Identity'}
246 | 
247 |     # shows those layers activation nodes.
248 |     activations = {'elu', 'softmax', 'selu', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid',
249 |                    'exponential', 'linear', 'leakyrelu'}
250 | 
251 |     op_names_normalization = {'AddV2': 'add'}
252 | 
253 |     # show as type 'layer' when no `activation` or linear activation has been set. This
254 |     # hides internals of those layers in the graph.
255 |     layers = {'Embedding', 'Flatten', 'Dense', 'Dropout', 'Reshape', 'BatchNormalization', 'UpSampling2D', 'Conv2D'}
256 | 
257 |     for i, output in enumerate(model.outputs):
258 |         name = tensor_name_to_node_name(output.name)
259 |         nodes_names_to_display.add(name)
260 |         collect_nodes_to_display(edges[name])
261 |         record_map[output.name] = name + ':0'
262 | 
263 |         shape = []
264 |         if '_output_shapes' in nodes[name].attr:
265 |             shape = list(map(lambda x: x.size, list(nodes[name].attr['_output_shapes'].list.shape[0].dim)))
266 | 
267 |         dk_nodes.append({
268 |             'id': output.name,
269 |             'label': name,
270 |             'type': 'output',
271 |             'subType': '',
272 |             'input': list(edges[name]),
273 |             'attributes': {},
274 |             'recordable': True,
275 |             'scope': '',
276 |             'shape': shape,
277 |         })
278 | 
279 |     for name in nodes_names_to_display:
280 |         scope_id = names_to_scope[name]
281 |         node_label = name
282 |         node_type = 'op'
283 |         node_sub_type: str = nodes[name].op
284 |         inputs = edges[name] if name in edges else []
285 |         recordable = True
286 |         shape = []
287 |         if '_output_shapes' in nodes[name].attr:
288 |             shape = list(map(lambda x: x.size, list(nodes[name].attr['_output_shapes'].list.shape[0].dim)))
289 | 
290 |         if name in input_names:
291 |             recordable = True
292 |             node_type = 'input'
293 | 
294 |         if name in output_names:
295 |             recordable = True
296 |             node_type = 'output'
297 | 
298 |         if node_sub_type in primitive:
299 |             node_type = 'primitive'
300 | 
301 |         if node_sub_type.lower() in activations:
302 |             node_type = 'activation'
303 | 
304 |         if node_sub_type in op_names_normalization:
305 |             node_sub_type = op_names_normalization[node_sub_type]
306 | 
307 |         if recordable:
308 |             # map names to tensor, which can be later used to fetch the output
309 |             try:
310 |                 g.get_tensor_by_name(name + ':0')
311 |                 record_map[name] = name + ':0'
312 |             except:
313 |                 recordable = False
314 | 
315 |         is_collapsible = node_sub_type != 'Sequential'
316 |         if scope_id and is_collapsible and scope_id in scope_nodes and len(scope_nodes[scope_id]) == 1:
317 |             # the scope has only one item, so collapse it.
318 |             parent_scope_id = get_parent(scope_id)
319 |             if scope_id not in nodes:
320 |                 scope_id = parent_scope_id
321 | 
322 |         # is_collapsible = node_sub_type != 'Sequential'
323 |         # while scope_id and is_collapsible and scope_id in scope_nodes and len(scope_nodes[scope_id]) == 1:
324 |         #     # the scope has only one item, so collapse it.
325 |         #     scope_id = get_parent(scope_id)
326 |         #     if scope_id in nodes:
327 |         #         is_collapsible = nodes[scope_id].op != 'Sequential'
328 |         #         inputs = edges[scope_id]
329 |         #     else:
330 |         #         is_collapsible = False
331 | 
332 |         node = {
333 |             'id': name,
334 |             'label': node_label,
335 |             'type': node_type,
336 |             'subType': node_sub_type,
337 |             'input': list(inputs),
338 |             'attributes': {},
339 |             'recordable': recordable,
340 |             'scope': scope_id,
341 |             'shape': shape,
342 |         }
343 |         dk_nodes.append(node)
344 | 
345 |     def extract_layers(model, scope_id=''):
346 |         if scope_id:
347 |             dk_scopes.append({
348 |                 'id': scope_id,
349 |                 'label': scope_id,
350 |                 'subType': type(model).__name__,
351 |                 'recordable': True,
352 |             })
353 |         scope_prefix = ((scope_id + '/') if scope_id else '')
354 | 
355 |         for layer in model.layers:
356 |             recordable = True
357 |             if hasattr(layer, 'outputs'):
358 |                 recordable = len(layer.outputs) == 1
359 |             else:
360 |                 recordable = layer.output is not None
361 | 
362 |             if recordable:
363 |                 # we track here the actual layer, because it contains the weights/biases correctly
364 |                 tensor = layer.outputs[0] if hasattr(layer, 'outputs') else layer.output
365 |                 # sub tensors must have the layer name as prefix. If this is not the case
366 |                 # it references the wrong tensor. We make here sure the correct sub tensor
367 |                 # is chosen and not one from a shadow/sibling graph.
368 |                 # 1. scope_prefix='', layer=Dense1 and tensor is like 'dense_1/Relu' which is correct
369 |                 # 2. scope_prefix='', layer=Sequential1 and tensor is like 'activation/tanh', but we need 'sequential_1/activation/tanh'
370 |                 # 3. scope_prefix='sequential_1', layer=Dense2, tensor is like 'dense_1/Relu', but we need 'sequential_1/dense_2/Relu'
371 |                 if '/' in tensor.name and not tensor.name.startswith(scope_prefix + layer.name + '/'):
372 |                     try:
373 |                         tensor = g.get_tensor_by_name(scope_prefix + tensor.name)
374 |                     except:
375 |                         tensor = g.get_tensor_by_name(scope_prefix + layer.name + '/' + tensor.name)
376 | 
377 |                 record_map[scope_prefix + layer.name] = {
378 |                     'layer': layer,
379 |                     'tensor': tensor
380 |                 }
381 | 
382 |             node_sub_type = type(layer).__name__
383 |             node_type = 'scope'
384 | 
385 |             if node_sub_type in layers:
386 |                 if not hasattr(layer, 'activation') or layer.activation is None or layer.activation.__name__ == 'linear':
387 |                     # once the layer has a custom activation function, we don't collapse it to a layer type
388 |                     # since that wouldn't be visible in the graph anymore.
389 |                     node_type = 'layer'
390 | 
391 |             dk_scopes.append({
392 |                 'id': scope_prefix + layer.name,
393 |                 'label': layer.name,
394 |                 'type': node_type,
395 |                 'subType': node_sub_type,
396 |                 'attributes': extract_attributes(layer),
397 |                 'recordable': recordable,
398 |                 'shape': layer.output_shape,
399 |             })
400 | 
401 |             if isinstance(layer, Model):
402 |                 extract_layers(layer, scope_prefix + layer.name)
403 | 
404 |     extract_layers(model, '')
405 | 
406 |     graph = {'nodes': dk_nodes, 'scopes': dk_scopes}
407 |     return graph, record_map, input_names
408 | 
409 | 
410 | class TFDebugger:
411 |     def __init__(self, debugger: deepkit.debugger.DebuggerManager, model, model_input, graph_name: str,
412 |                  record_map: dict, is_batch: bool, input_names: List[str]):
413 |         self.debugger = debugger
414 |         self.model = model
415 |         self.model_input = model_input
416 |         self.graph_name = graph_name
417 |         self.input_names = input_names
418 |         self.is_batch = is_batch
419 | 
420 |         # contains a map of recording map, names from nodes of the full graph to actual modules
421 |         # this is necessary since we map certain internal nodes to a scope/layer/module.
422 |         self.record_map = record_map
423 | 
424 |         self.fetch_result: Dict[str, deepkit.debugger.DebuggerFetchItem] = dict()
425 |         self.fetch_config: Optional[deepkit.debugger.DebuggerFetchConfig] = None
426 | 
427 |     def set_input(self, x):
428 |         # resize batches to size 1 if is_batch=True
429 |         if isinstance(x, tf.data.Dataset):
430 |             x = next(iter(x))[0]
431 | 
432 |         if len(self.input_names) == 1:
433 |             self.model_input = np.array([x[0]] if self.is_batch else x)
434 |         else:
435 |             self.model_input = [np.array([v[0]]) if self.is_batch else v for v in x]
436 | 
437 |     def fetch(self, fetch_config: deepkit.debugger.DebuggerFetchConfig) -> Dict[
438 |         str, deepkit.debugger.DebuggerFetchItem]:
439 |         self.fetch_config = fetch_config
440 |         self.fetch_result = dict()
441 | 
442 |         node_names = []
443 |         for name in self.record_map:
444 |             # if name is an input, we need to fetch it directly from the self.model_input
445 |             # otherwise TF crashes with `input_1:0 is both fed and fetched`
446 |             if name in self.input_names:
447 |                 continue
448 | 
449 |             node_id = self.graph_name + ':' + name
450 |             if self.fetch_config.needs_fetch(node_id):
451 |                 node_names.append(name)
452 | 
453 |         if self.model_input is not None:
454 |             if len(self.input_names) > 1:
455 |                 for i, name in enumerate(self.input_names):
456 |                     self._set_item_from_input(i, self.model_input[i])
457 |             elif len(self.input_names) == 1:
458 |                 self._set_item_from_input(0, self.model_input)
459 | 
460 |         if not len(node_names):
461 |             return self.fetch_result
462 | 
463 |         if self.model_input is None:
464 |             return self.fetch_result
465 | 
466 |         data = self.get_image_and_histogram_from_layers(self.fetch_config.x, node_names)
467 | 
468 |         for i, name in enumerate(node_names):
469 |             jpeg, ahistogram = data[i]
470 |             whistogram = None
471 |             bhistogram = None
472 |             tensor_or_layer_dict = self.record_map[name]
473 |             if isinstance(tensor_or_layer_dict, dict):
474 |                 layer = tensor_or_layer_dict['layer']
475 |                 whistogram, bhistogram = self.get_weight_histogram_from_layer(self.fetch_config.x, layer)
476 | 
477 |             node_id = self.graph_name + ':' + name
478 |             self.fetch_result[node_id] = deepkit.debugger.DebuggerFetchItem(
479 |                 name=node_id,
480 |                 output=jpeg,
481 |                 ahistogram=ahistogram,
482 |                 whistogram=whistogram,
483 |                 bhistogram=bhistogram,
484 |             )
485 | 
486 |         return self.fetch_result
487 | 
488 |     def _set_item_from_input(self, index, data):
489 |         name = self.input_names[index]
490 |         node_id = self.graph_name + ':' + name
491 | 
492 |         if not self.fetch_config.needs_fetch(node_id):
493 |             return
494 | 
495 |         jpeg, ahistogram = self._image_and_histogram(self.fetch_config.x, data)
496 |         self.fetch_result[node_id] = deepkit.debugger.DebuggerFetchItem(
497 |             name=node_id,
498 |             output=jpeg,
499 |             ahistogram=ahistogram,
500 |             whistogram=None,
501 |             bhistogram=None,
502 |         )
503 | 
504 |     def get_image_and_histogram_from_layers(self, x, names):
505 |         outputs = []
506 |         output_tensor = self.model.outputs[0] if hasattr(self.model, 'outputs') else self.model.output
507 |         g = output_tensor.graph
508 |         for name in names:
509 |             tensor_name_or_layer_dict = self.record_map[name]
510 |             if isinstance(tensor_name_or_layer_dict, str):
511 |                 tensor = g.get_tensor_by_name(tensor_name_or_layer_dict)
512 |                 outputs.append(tensor)
513 |             else:
514 |                 layer_dict = tensor_name_or_layer_dict
515 |                 outputs.append(layer_dict['tensor'])
516 | 
517 |         inputs = self.model.inputs if hasattr(self.model, 'inputs') else self.model.input
518 | 
519 |         fn = keras.backend.function(inputs, outputs)
520 |         try:
521 |             y = fn(self.model_input)
522 | 
523 |             result = []
524 | 
525 |             for i, _ in enumerate(names):
526 |                 result.append(self._image_and_histogram(x, y[i]))
527 | 
528 |             return result
529 |         except Exception as e:
530 |             print(f"Failed to watch tensor. Input shape: {self.model_input.shape}, outputs={len(outputs)}")
531 |             raise e
532 | 
533 |     def _image_and_histogram(self, x, output):
534 |         image = None
535 |         histogram = None
536 |         if hasattr(output, 'shape'):
537 |             # tf is not batch per default
538 |             sample = np.copy(output)
539 |             shape = output.shape
540 | 
541 |             if self.is_batch:
542 |                 # display only first item in batch
543 |                 sample = np.copy(output[0])
544 |                 shape = output.shape[1:] # first is batch shizzle
545 | 
546 |             if len(shape) == 3:
547 |                 if keras.backend.image_data_format() == 'channels_last':
548 |                     sample = np.transpose(sample, (2, 0, 1))
549 | 
550 |                 if sample.shape[0] == 3:
551 |                     image = PIL.Image.fromarray(get_layer_vis_square(sample))
552 |                 else:
553 |                     image = PIL.Image.fromarray(get_image_tales(sample))
554 |             elif len(shape) > 1:
555 |                 image = PIL.Image.fromarray(get_layer_vis_square(sample))
556 |             elif len(shape) == 1:
557 |                 if shape[0] == 1:
558 |                     # we got a single number
559 |                     output = sample[0]
560 |                 else:
561 |                     image = make_image_from_dense(sample)
562 | 
563 |             h = np.histogram(sample, bins=20)
564 |             histogram = pack('<BIH', 1, int(x), h[0].size) + h[1].astype('<f').tobytes() + h[0].astype('<I').tobytes()
565 | 
566 |         output_rep = None
567 |         if isinstance(image, PIL.Image.Image):
568 |             output_rep = image
569 |         elif isinstance(output, (float, np.floating)):
570 |             output_rep = float(output)
571 |         elif isinstance(output, (int, np.integer)):
572 |             output_rep = int(output)
573 | 
574 |         return output_rep, histogram
575 | 
576 |     def get_weight_histogram_from_layer(self, x, layer):
577 |         layer_weights = layer.get_weights()
578 |         weights = None
579 | 
580 |         if len(layer_weights) > 0:
581 |             h = np.histogram(layer_weights[0], bins=20)
582 |             # <version><x><bins><...x><...y>, little endian
583 |             # uint8|Uint32|Uint16|...Float32|...Uint32
584 |             # B|L|H|...f|...L
585 |             weights = pack('<BIH', 1, int(x), h[0].size) + h[1].astype('<f').tobytes() + h[0].astype('<I').tobytes()
586 | 
587 |         biases = None
588 |         if len(layer_weights) > 1:
589 |             h = np.histogram(layer_weights[1], bins=20)
590 |             biases = pack('<BIH', 1, int(x), h[0].size) + h[1].astype('<f').tobytes() + h[0].astype('<I').tobytes()
591 | 
592 |         return weights, biases
593 | 


--------------------------------------------------------------------------------
/deepkit/utils/image.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2014-2016, NVIDIA CORPORATION.  All rights reserved.
  2 | # BSD 3-clause license
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import print_function
  6 | 
  7 | import io
  8 | import math
  9 | 
 10 | from deepkit.utils import array_to_img
 11 | from deepkit.utils.pilutil import imresize
 12 | 
 13 | import numpy as np
 14 | import PIL.Image
 15 | 
 16 | # Library defaults:
 17 | #   PIL.Image:
 18 | #       size -- (width, height)
 19 | #   np.array:
 20 | #       shape -- (height, width, channels)
 21 | #       range -- [0-255]
 22 | #       dtype -- uint8
 23 | #       channels -- RGB
 24 | #   caffe.datum:
 25 | #       datum.data type -- bytes (uint8)
 26 | #       datum.float_data type -- float32
 27 | #       when decoding images, channels are BGR
 28 | #   DIGITS:
 29 | #       image_dims -- (height, width, channels)
 30 | 
 31 | # List of supported file extensions
 32 | # Use like "if filename.endswith(SUPPORTED_EXTENSIONS)"
 33 | SUPPORTED_EXTENSIONS = ('.png', '.jpg', '.jpeg', '.bmp', '.ppm')
 34 | 
 35 | 
 36 | def pil_image_to_jpeg(image):
 37 |     buffer = io.BytesIO()
 38 | 
 39 |     image.save(buffer, format="PNG", quality=70)
 40 |     return buffer.getvalue()
 41 | 
 42 | 
 43 | def make_image_from_dense(neurons):
 44 |     cols = int(math.ceil(math.sqrt(len(neurons))))
 45 | 
 46 |     even_length = cols * cols
 47 |     diff = even_length - len(neurons)
 48 |     if diff > 0:
 49 |         neurons = np.append(neurons, np.zeros(diff, dtype=neurons.dtype))
 50 | 
 51 |     img = array_to_img(neurons.reshape((1, cols, cols)))
 52 |     img = img.resize((cols * 8, cols * 8), PIL.Image.NEAREST)
 53 | 
 54 |     return img
 55 | 
 56 | 
 57 | def upscale(image, ratio):
 58 |     """
 59 |     return upscaled image array
 60 |     Arguments:
 61 |     image -- a (H,W,C) numpy.ndarray
 62 |     ratio -- scaling factor (>1)
 63 |     """
 64 |     if not isinstance(image, np.ndarray):
 65 |         raise ValueError('Expected ndarray')
 66 |     if ratio < 1:
 67 |         raise ValueError('Ratio must be greater than 1 (ratio=%f)' % ratio)
 68 |     width = int(math.floor(image.shape[1] * ratio))
 69 |     height = int(math.floor(image.shape[0] * ratio))
 70 |     channels = image.shape[2]
 71 |     out = np.ndarray((height, width, channels), dtype=np.uint8)
 72 |     for x, y in np.ndindex((width, height)):
 73 |         out[y, x] = image[int(math.floor(y / ratio)), int(math.floor(x / ratio))]
 74 |     return out
 75 | 
 76 | 
 77 | def resize_image(image, height, width,
 78 |                  channels=None,
 79 |                  resize_mode=None
 80 |                  ):
 81 |     """
 82 |     Resizes an image and returns it as a np.array
 83 |     Arguments:
 84 |     image -- a PIL.Image or numpy.ndarray
 85 |     height -- height of new image
 86 |     width -- width of new image
 87 |     Keyword Arguments:
 88 |     channels -- channels of new image (stays unchanged if not specified)
 89 |     resize_mode -- can be crop, squash, fill or half_crop
 90 |     """
 91 |     if resize_mode is None:
 92 |         resize_mode = 'squash'
 93 |     if resize_mode not in ['crop', 'squash', 'fill', 'half_crop']:
 94 |         raise ValueError('resize_mode "%s" not supported' % resize_mode)
 95 | 
 96 |     if channels not in [None, 1, 3]:
 97 |         raise ValueError('unsupported number of channels: %s' % channels)
 98 | 
 99 |     if isinstance(image, PIL.Image.Image):
100 |         # Convert image mode (channels)
101 |         if channels is None:
102 |             image_mode = image.mode
103 |             if image_mode == 'L':
104 |                 channels = 1
105 |             elif image_mode == 'RGB':
106 |                 channels = 3
107 |             else:
108 |                 raise ValueError('unknown image mode "%s"' % image_mode)
109 |         elif channels == 1:
110 |             # 8-bit pixels, black and white
111 |             image_mode = 'L'
112 |         elif channels == 3:
113 |             # 3x8-bit pixels, true color
114 |             image_mode = 'RGB'
115 |         if image.mode != image_mode:
116 |             image = image.convert(image_mode)
117 |         image = np.array(image)
118 |     elif isinstance(image, np.ndarray):
119 |         if image.dtype != np.uint8:
120 |             image = image.astype(np.uint8)
121 |         if image.ndim == 3 and image.shape[2] == 1:
122 |             image = image.reshape(image.shape[:2])
123 |         if channels is None:
124 |             if image.ndim == 2:
125 |                 channels = 1
126 |             elif image.ndim == 3 and image.shape[2] == 3:
127 |                 channels = 3
128 |             else:
129 |                 raise ValueError('invalid image shape: %s' % (image.shape,))
130 |         elif channels == 1:
131 |             if image.ndim != 2:
132 |                 if image.ndim == 3 and image.shape[2] == 3:
133 |                     # color to grayscale
134 |                     image = np.dot(image, [0.299, 0.587, 0.114]).astype(np.uint8)
135 |                 else:
136 |                     raise ValueError('invalid image shape: %s' % (image.shape,))
137 |         elif channels == 3:
138 |             if image.ndim == 2:
139 |                 # grayscale to color
140 |                 image = np.repeat(image, 3).reshape(image.shape + (3,))
141 |             elif image.shape[2] != 3:
142 |                 raise ValueError('invalid image shape: %s' % (image.shape,))
143 |     else:
144 |         raise ValueError('resize_image() expected a PIL.Image.Image or a numpy.ndarray')
145 | 
146 |     # No need to resize
147 |     if image.shape[0] == height and image.shape[1] == width:
148 |         return image
149 | 
150 |     # Resize
151 |     interp = 'bilinear'
152 | 
153 |     width_ratio = float(image.shape[1]) / width
154 |     height_ratio = float(image.shape[0]) / height
155 |     if resize_mode == 'squash' or width_ratio == height_ratio:
156 |         return imresize(image, (height, width), interp=interp)
157 |     elif resize_mode == 'crop':
158 |         # resize to smallest of ratios (relatively larger image), keeping aspect ratio
159 |         if width_ratio > height_ratio:
160 |             resize_height = height
161 |             resize_width = int(round(image.shape[1] / height_ratio))
162 |         else:
163 |             resize_width = width
164 |             resize_height = int(round(image.shape[0] / width_ratio))
165 |         image = imresize(image, (resize_height, resize_width), interp=interp)
166 | 
167 |         # chop off ends of dimension that is still too long
168 |         if width_ratio > height_ratio:
169 |             start = int(round((resize_width - width) / 2.0))
170 |             return image[:, start:start + width]
171 |         else:
172 |             start = int(round((resize_height - height) / 2.0))
173 |             return image[start:start + height, :]
174 |     else:
175 |         if resize_mode == 'fill':
176 |             # resize to biggest of ratios (relatively smaller image), keeping aspect ratio
177 |             if width_ratio > height_ratio:
178 |                 resize_width = width
179 |                 resize_height = int(round(image.shape[0] / width_ratio))
180 |                 if (height - resize_height) % 2 == 1:
181 |                     resize_height += 1
182 |             else:
183 |                 resize_height = height
184 |                 resize_width = int(round(image.shape[1] / height_ratio))
185 |                 if (width - resize_width) % 2 == 1:
186 |                     resize_width += 1
187 |             image = imresize(image, (resize_height, resize_width), interp=interp)
188 |         elif resize_mode == 'half_crop':
189 |             # resize to average ratio keeping aspect ratio
190 |             new_ratio = (width_ratio + height_ratio) / 2.0
191 |             resize_width = int(round(image.shape[1] / new_ratio))
192 |             resize_height = int(round(image.shape[0] / new_ratio))
193 |             if width_ratio > height_ratio and (height - resize_height) % 2 == 1:
194 |                 resize_height += 1
195 |             elif width_ratio < height_ratio and (width - resize_width) % 2 == 1:
196 |                 resize_width += 1
197 |             image = imresize(image, (resize_height, resize_width), interp=interp)
198 |             # chop off ends of dimension that is still too long
199 |             if width_ratio > height_ratio:
200 |                 start = int(round((resize_width - width) / 2.0))
201 |                 image = image[:, start:start + width]
202 |             else:
203 |                 start = int(round((resize_height - height) / 2.0))
204 |                 image = image[start:start + height, :]
205 |         else:
206 |             raise Exception('unrecognized resize_mode "%s"' % resize_mode)
207 | 
208 |         # fill ends of dimension that is too short with random noise
209 |         if width_ratio > height_ratio:
210 |             padding = (height - resize_height) / 2
211 |             noise_size = (padding, width)
212 |             if channels > 1:
213 |                 noise_size += (channels,)
214 |             noise = np.random.randint(0, 255, noise_size).astype('uint8')
215 |             image = np.concatenate((noise, image, noise), axis=0)
216 |         else:
217 |             padding = (width - resize_width) / 2
218 |             noise_size = (height, padding)
219 |             if channels > 1:
220 |                 noise_size += (channels,)
221 |             noise = np.random.randint(0, 255, noise_size).astype('uint8')
222 |             image = np.concatenate((noise, image, noise), axis=1)
223 | 
224 |         return image
225 | 
226 | 
227 | def add_bboxes_to_image(image, bboxes, color='red', width=1):
228 |     """
229 |     Draw rectangles on the image for the bounding boxes
230 |     Returns a PIL.Image
231 |     Arguments:
232 |     image -- input image
233 |     bboxes -- bounding boxes in the [((l, t), (r, b)), ...] format
234 |     Keyword arguments:
235 |     color -- color to draw the rectangles
236 |     width -- line width of the rectangles
237 |     Example:
238 |     image = Image.open(filename)
239 |     add_bboxes_to_image(image, bboxes[filename], width=2, color='#FF7700')
240 |     image.show()
241 |     """
242 | 
243 |     def expanded_bbox(bbox, n):
244 |         """
245 |         Grow the bounding box by n pixels
246 |         """
247 |         l = min(bbox[0][0], bbox[1][0])
248 |         r = max(bbox[0][0], bbox[1][0])
249 |         t = min(bbox[0][1], bbox[1][1])
250 |         b = max(bbox[0][1], bbox[1][1])
251 |         return ((l - n, t - n), (r + n, b + n))
252 | 
253 |     from PIL import ImageDraw
254 |     draw = ImageDraw.Draw(image)
255 |     for bbox in bboxes:
256 |         for n in range(width):
257 |             draw.rectangle(expanded_bbox(bbox, n), outline=color)
258 | 
259 |     return image
260 | 
261 | 
262 | def get_layer_vis_square(data,
263 |                          allow_heatmap=True,
264 |                          normalize=True,
265 |                          min_img_dim=100,
266 |                          max_width=1200,
267 |                          channel_order='RGB',
268 |                          colormap='jet',
269 |                          ):
270 |     """
271 |     Returns a vis_square for the given layer data
272 |     Arguments:
273 |     data -- a np.ndarray
274 |     Keyword arguments:
275 |     allow_heatmap -- if True, convert single channel images to heatmaps
276 |     normalize -- whether to normalize the data when visualizing
277 |     max_width -- maximum width for the vis_square
278 |     """
279 |     if channel_order not in ['RGB', 'BGR']:
280 |         raise ValueError('Unsupported channel_order %s' % channel_order)
281 |     if data.ndim == 1:
282 |         # interpret as 1x1 grayscale images
283 |         # (N, 1, 1)
284 |         data = data[:, np.newaxis, np.newaxis]
285 |     elif data.ndim == 2:
286 |         # interpret as 1x1 grayscale images
287 |         # (N, 1, 1)
288 |         data = data.reshape((data.shape[0] * data.shape[1], 1, 1))
289 |     elif data.ndim == 3:
290 |         if data.shape[0] == 3:
291 |             # interpret as a color image
292 |             # (1, H, W, 3)
293 |             if channel_order == 'BGR':
294 |                 data = data[[2, 1, 0], ...]  # BGR to RGB (see issue #59)
295 |             data = data.transpose(1, 2, 0)
296 |             data = data[np.newaxis, ...]
297 |         else:
298 |             # interpret as grayscale images
299 |             # (N, H, W)
300 |             pass
301 |     elif data.ndim == 4:
302 |         if data.shape[0] == 3:
303 |             # interpret as HxW color images
304 |             # (N, H, W, 3)
305 |             data = data.transpose(1, 2, 3, 0)
306 |             if channel_order == 'BGR':
307 |                 data = data[:, :, :, [2, 1, 0]]  # BGR to RGB (see issue #59)
308 |         elif data.shape[1] == 3:
309 |             # interpret as HxW color images
310 |             # (N, H, W, 3)
311 |             data = data.transpose(0, 2, 3, 1)
312 |             if channel_order == 'BGR':
313 |                 data = data[:, :, :, [2, 1, 0]]  # BGR to RGB (see issue #59)
314 |         else:
315 |             # interpret as HxW grayscale images
316 |             # (N, H, W)
317 |             data = data.reshape((data.shape[0] * data.shape[1], data.shape[2], data.shape[3]))
318 |     else:
319 |         raise RuntimeError('unrecognized data shape: %s' % (data.shape,))
320 | 
321 |     return get_layer_vis_square_raw(data,
322 |                                     allow_heatmap,
323 |                                     normalize,
324 |                                     min_img_dim,
325 |                                     max_width,
326 |                                     colormap,
327 |                                     )
328 | 
329 | 
330 | def get_image_tales(images, colormap='jet', min_img_dim=100, max_width=1000):
331 |     padsize = 1
332 |     # convert to float since we're going to do some math
333 |     images = images.astype('float32')
334 | 
335 |     images -= images.min()
336 |     if images.max() > 0:
337 |         images /= images.max()
338 |         images *= 255
339 | 
340 |     if images.ndim == 3:
341 |         # they're grayscale - convert to a colormap
342 |         redmap, greenmap, bluemap = get_color_map(colormap)
343 | 
344 |         red = np.interp(images * (len(redmap) - 1) / 255.0, range(len(redmap)), redmap)
345 |         green = np.interp(images * (len(greenmap) - 1) / 255.0, range(len(greenmap)), greenmap)
346 |         blue = np.interp(images * (len(bluemap) - 1) / 255.0, range(len(bluemap)), bluemap)
347 | 
348 |         # Slap the channels back together
349 |         images = np.concatenate(
350 |             (red[..., np.newaxis], green[..., np.newaxis], blue[..., np.newaxis]), axis=3)
351 |         images = np.minimum(images, 255)
352 |         images = np.maximum(images, 0)
353 | 
354 |     # convert back to uint8
355 |     images = images.astype('uint8')
356 | 
357 |     # Compute the output image matrix dimensions
358 |     n = int(np.ceil(np.sqrt(images.shape[0])))
359 |     ny = n
360 |     nx = n
361 |     length = images.shape[0]
362 |     if n * (n - 1) >= length:
363 |         nx = n - 1
364 | 
365 |     # Add padding between the images
366 |     padding = ((0, nx * ny - length), (0, padsize), (0, padsize)) + ((0, 0),) * (images.ndim - 3)
367 |     padded = np.pad(images, padding, mode='constant', constant_values=0)
368 | 
369 |     # Tile the images beside each other
370 |     tiles = padded.reshape(
371 |         (ny, nx) + padded.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, padded.ndim + 1)))
372 |     tiles = tiles.reshape((ny * tiles.shape[1], nx * tiles.shape[3]) + tiles.shape[4:])
373 | 
374 |     return tiles
375 | 
376 | 
377 | def get_layer_vis_square_raw(data,
378 |                              allow_heatmap=True,
379 |                              normalize=True,
380 |                              min_img_dim=100,
381 |                              max_width=1200,
382 |                              colormap='jet',
383 |                              ):
384 |     # chop off data so that it will fit within max_width
385 |     padsize = 0
386 |     width = data.shape[2]
387 |     if width > max_width:
388 |         data = data[:1, :max_width, :max_width]
389 |     else:
390 |         if width > 1:
391 |             padsize = 1
392 |             width += 1
393 |         n = max(max_width // width, 1)
394 |         n *= n
395 |         data = data[:n]
396 | 
397 |     if not allow_heatmap and data.ndim == 3:
398 |         data = data[..., np.newaxis]
399 | 
400 |     vis = vis_square(data,
401 |                      padsize=padsize,
402 |                      normalize=normalize,
403 |                      colormap=colormap
404 |                      )
405 | 
406 |     # find minimum dimension and upscale if necessary
407 |     _min = sorted(vis.shape[:2])[0]
408 |     if _min < min_img_dim:
409 |         # upscale image
410 |         ratio = min_img_dim / float(_min)
411 |         vis = upscale(vis, ratio)
412 |     return vis
413 | 
414 | 
415 | def vis_square(images,
416 |                padsize=1,
417 |                normalize=False,
418 |                colormap='jet',
419 |                ):
420 |     """
421 |     Visualize each image in a grid of size approx sqrt(n) by sqrt(n)
422 |     Returns a np.array image
423 |     (Based on Caffe's filter_visualization notebook)
424 |     Arguments:
425 |     images -- an array of shape (N, H, W) or (N, H, W, C)
426 |             if C is not set, a heatmap is computed for the result
427 |     Keyword arguments:
428 |     padsize -- how many pixels go inbetween the tiles
429 |     normalize -- if true, scales (min, max) across all images out to (0, 1)
430 |     colormap -- a string representing one of the supported colormaps
431 |     """
432 |     assert 3 <= images.ndim <= 4, 'images.ndim must be 3 or 4'
433 |     # convert to float since we're going to do some math
434 |     images = images.astype('float32')
435 |     if normalize:
436 |         images -= images.min()
437 |         if images.max() > 0:
438 |             images /= images.max()
439 |             images *= 255
440 | 
441 |     if images.ndim == 3:
442 |         # they're grayscale - convert to a colormap
443 |         redmap, greenmap, bluemap = get_color_map(colormap)
444 | 
445 |         red = np.interp(images * (len(redmap) - 1) / 255.0, range(len(redmap)), redmap)
446 |         green = np.interp(images * (len(greenmap) - 1) / 255.0, range(len(greenmap)), greenmap)
447 |         blue = np.interp(images * (len(bluemap) - 1) / 255.0, range(len(bluemap)), bluemap)
448 | 
449 |         # Slap the channels back together
450 |         images = np.concatenate(
451 |             (red[..., np.newaxis], green[..., np.newaxis], blue[..., np.newaxis]), axis=3)
452 |         images = np.minimum(images, 255)
453 |         images = np.maximum(images, 0)
454 | 
455 |     # convert back to uint8
456 |     images = images.astype('uint8')
457 | 
458 |     # Compute the output image matrix dimensions
459 |     n = int(np.ceil(np.sqrt(images.shape[0])))
460 |     ny = n
461 |     nx = n
462 |     length = images.shape[0]
463 |     if n * (n - 1) >= length:
464 |         nx = n - 1
465 | 
466 |     # Add padding between the images
467 |     padding = ((0, nx * ny - length), (0, padsize), (0, padsize)) + ((0, 0),) * (images.ndim - 3)
468 |     padded = np.pad(images, padding, mode='constant', constant_values=255)
469 | 
470 |     # Tile the images beside each other
471 |     tiles = padded.reshape(
472 |         (ny, nx) + padded.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, padded.ndim + 1)))
473 |     tiles = tiles.reshape((ny * tiles.shape[1], nx * tiles.shape[3]) + tiles.shape[4:])
474 | 
475 |     if tiles.shape[-1] == 1:
476 |         # grayscale to color
477 |         tiles = np.dstack([tiles.squeeze()] * 3)
478 | 
479 |     return tiles
480 | 
481 | 
482 | def get_color_map(name):
483 |     """
484 |     Return a colormap as (redmap, greenmap, bluemap)
485 |     Arguments:
486 |     name -- the name of the colormap. If unrecognized, will default to 'jet'.
487 |     """
488 |     redmap = [0]
489 |     greenmap = [0]
490 |     bluemap = [0]
491 |     if name == 'white':
492 |         # essentially a noop
493 |         redmap = [0, 1]
494 |         greenmap = [0, 1]
495 |         bluemap = [0, 1]
496 |     elif name == 'simple':
497 |         redmap = [0, 1, 1, 1]
498 |         greenmap = [0, 0, 1, 1]
499 |         bluemap = [0, 0, 0, 1]
500 |     elif name == 'hot':
501 |         redmap = [0, 0.03968253968253968, 0.07936507936507936, 0.119047619047619, 0.1587301587301587,
502 |                   0.1984126984126984, 0.2380952380952381, 0.2777777777777778, 0.3174603174603174, 0.3571428571428571,
503 |                   0.3968253968253968, 0.4365079365079365, 0.4761904761904762, 0.5158730158730158, 0.5555555555555556,
504 |                   0.5952380952380952,
505 |                   0.6349206349206349, 0.6746031746031745, 0.7142857142857142, 0.753968253968254, 0.7936507936507936,
506 |                   0.8333333333333333, 0.873015873015873, 0.9126984126984127, 0.9523809523809523, 0.992063492063492, 1,
507 |                   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
508 |                   1, 1, 1]
509 |         greenmap = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03174603174603163,
510 |                     0.0714285714285714, 0.1111111111111112, 0.1507936507936507, 0.1904761904761905, 0.23015873015873,
511 |                     0.2698412698412698, 0.3095238095238093, 0.3492063492063491, 0.3888888888888888, 0.4285714285714284,
512 |                     0.4682539682539679, 0.5079365079365079, 0.5476190476190477, 0.5873015873015872, 0.6269841269841268,
513 |                     0.6666666666666665, 0.7063492063492065, 0.746031746031746, 0.7857142857142856, 0.8253968253968254,
514 |                     0.8650793650793651, 0.9047619047619047, 0.9444444444444442, 0.984126984126984, 1, 1, 1, 1, 1, 1, 1,
515 |                     1, 1, 1, 1, 1, 1]
516 |         bluemap = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
517 |                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.04761904761904745, 0.1269841269841265,
518 |                    0.2063492063492056, 0.2857142857142856, 0.3650793650793656, 0.4444444444444446, 0.5238095238095237,
519 |                    0.6031746031746028, 0.6825396825396828, 0.7619047619047619, 0.8412698412698409, 0.92063492063492, 1]
520 |     elif name == 'rainbow':
521 |         redmap = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9365079365079367,
522 |                   0.8571428571428572, 0.7777777777777777, 0.6984126984126986, 0.6190476190476191, 0.53968253968254,
523 |                   0.4603174603174605, 0.3809523809523814, 0.3015873015873018, 0.2222222222222223, 0.1428571428571432,
524 |                   0.06349206349206415, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03174603174603208, 0.08465608465608465,
525 |                   0.1375661375661377, 0.1904761904761907, 0.2433862433862437, 0.2962962962962963, 0.3492063492063493,
526 |                   0.4021164021164023, 0.4550264550264553, 0.5079365079365079, 0.5608465608465609, 0.6137566137566139,
527 |                   0.666666666666667]
528 |         greenmap = [0, 0.03968253968253968, 0.07936507936507936, 0.119047619047619, 0.1587301587301587,
529 |                     0.1984126984126984, 0.2380952380952381, 0.2777777777777778, 0.3174603174603174, 0.3571428571428571,
530 |                     0.3968253968253968, 0.4365079365079365, 0.4761904761904762, 0.5158730158730158, 0.5555555555555556,
531 |                     0.5952380952380952, 0.6349206349206349, 0.6746031746031745, 0.7142857142857142, 0.753968253968254,
532 |                     0.7936507936507936,
533 |                     0.8333333333333333, 0.873015873015873, 0.9126984126984127, 0.9523809523809523, 0.992063492063492, 1,
534 |                     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9841269841269842, 0.9047619047619047, 0.8253968253968256,
535 |                     0.7460317460317465, 0.666666666666667, 0.587301587301587, 0.5079365079365079, 0.4285714285714288,
536 |                     0.3492063492063493, 0.2698412698412698, 0.1904761904761907, 0.1111111111111116, 0.03174603174603208,
537 |                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
538 |         bluemap = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
539 |                    0, 0, 0, 0, 0.01587301587301582, 0.09523809523809534, 0.1746031746031744, 0.2539682539682535,
540 |                    0.333333333333333, 0.412698412698413, 0.4920634920634921, 0.5714285714285712, 0.6507936507936507,
541 |                    0.7301587301587302, 0.8095238095238093, 0.8888888888888884, 0.9682539682539679, 1, 1, 1, 1, 1, 1, 1,
542 |                    1, 1, 1, 1, 1, 1]
543 |     elif name == 'winter':
544 |         greenmap = [0, 1]
545 |         bluemap = [1, 0.5]
546 |     else:
547 |         if name != 'jet':
548 |             print('Warning: colormap "%s" not supported. Using jet instead.' % name)
549 |         redmap = [0, 0, 0, 0, 0.5, 1, 1, 1, 0.5]
550 |         greenmap = [0, 0, 0.5, 1, 1, 1, 0.5, 0, 0]
551 |         bluemap = [0.5, 1, 1, 1, 0.5, 0, 0, 0, 0]
552 |     return 255.0 * np.array(redmap), 255.0 * np.array(greenmap), 255.0 * np.array(bluemap)
553 | 


--------------------------------------------------------------------------------
/deepkit/experiment.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import atexit
  3 | import base64
  4 | import json
  5 | import os
  6 | import signal
  7 | import struct
  8 | import sys
  9 | import time
 10 | from datetime import datetime
 11 | from threading import Lock
 12 | from typing import Optional, Callable, NamedTuple, Dict, List
 13 | import math
 14 | 
 15 | import PIL.Image
 16 | import numpy as np
 17 | import psutil
 18 | import typedload
 19 | from rx import interval
 20 | 
 21 | import deepkit.client
 22 | import deepkit.debugger
 23 | import deepkit.globals
 24 | import deepkit.utils
 25 | from deepkit.model import ExperimentOptions
 26 | from deepkit.utils.image import pil_image_to_jpeg, get_layer_vis_square, get_image_tales, make_image_from_dense
 27 | from deepkit.utils import numpy_to_binary
 28 | 
 29 | 
 30 | def get_job_config():
 31 |     if deepkit.globals.loaded_job_config is None:
 32 |         if 'DEEPKIT_JOB_CONFIG' in os.environ:
 33 |             deepkit.globals.loaded_job_config = json.loads(os.environ['DEEPKIT_JOB_CONFIG'])
 34 |         else:
 35 |             deepkit.globals.loaded_job_config = {
 36 |             }
 37 | 
 38 |     return deepkit.globals.loaded_job_config
 39 | 
 40 | 
 41 | class JobController:
 42 |     def stop(self):
 43 |         """
 44 |         Raising the SIGINT signal in the current process and all sub-processes.
 45 |         os.kill() only issues a signal in the current process (without subprocesses).
 46 |         CTRL+C on the console sends the signal to the process group (which we need).
 47 |         """
 48 |         if hasattr(signal, 'CTRL_C_EVENT'):
 49 |             # windows. Need CTRL_C_EVENT to raise the signal in the whole process group
 50 |             os.kill(os.getpid(), signal.CTRL_C_EVENT)
 51 |         else:
 52 |             # unix.
 53 |             pgid = os.getpgid(os.getpid())
 54 |             if pgid == 1:
 55 |                 os.kill(os.getpid(), signal.SIGINT)
 56 |             else:
 57 |                 os.killpg(os.getpgid(os.getpid()), signal.SIGINT)
 58 | 
 59 | 
 60 | class JobDebuggingState(NamedTuple):
 61 |     watchingLayers: Dict[str, bool]
 62 |     live: bool
 63 |     recording: bool
 64 | 
 65 |     # 'epoch' | 'second'
 66 |     recordingMode: str
 67 | 
 68 |     # 'watched' | 'all'
 69 |     recordingLayers: str
 70 | 
 71 |     recordingSecond: int
 72 | 
 73 | 
 74 | class JobDebuggerController:
 75 |     def __init__(self, client: deepkit.client.Client):
 76 |         self.state: Optional[JobDebuggingState] = None
 77 |         self.client = client
 78 | 
 79 |     async def connected(self):
 80 |         await self._update_watching_layers()
 81 | 
 82 |     # registered RPC function
 83 |     async def updateWatchingLayer(self):
 84 |         await self._update_watching_layers()
 85 | 
 86 |     async def _update_watching_layers(self):
 87 |         self.state = typedload.load(await self.client.job_action('getDebuggingState'), JobDebuggingState)
 88 | 
 89 | 
 90 | class Experiment:
 91 |     def __init__(self, project=None, account=None, monitoring=True, try_pick_up=False, parent_experiment=None,
 92 |                  silent=False):
 93 |         """
 94 |         :type project: str If the current folder is not linked and you don't specify a project here, an error is raised since
 95 |         Deepkit isn't able to know to which project the experiments data should be sent.
 96 | 
 97 |         :type account: str Per default the first account linked to this folder is used (see `deepkit link` or `deepkit-sdk auth -l`),
 98 |         this is on a new system `localhost`.
 99 |         You can overwrite which account is used by specifying the name here (see `deepkit id` for
100 |         available accounts in your system).
101 | 
102 |         :type monitoring: bool When true this experiment sends current stdout as experiment logs and monitors
103 |         hardware utilisation of the current process.
104 | 
105 |         :type try_pick_up: bool Whether it should be tried to pick up an existing experiment created by the
106 |         CLI/App (determined by environment variables). For manually created experiments this should be False.
107 | 
108 |         :tye parent_experiment: str When defined moves this experiment as sub experiment
109 |         """
110 |         self.account = account
111 |         self.project = project
112 |         self.monitoring = monitoring
113 |         self.parent_experiment = parent_experiment
114 |         self.silent = silent
115 | 
116 |         if not self.parent_experiment:
117 |             if deepkit.globals.last_experiment:
118 |                 self.parent_experiment = deepkit.globals.last_experiment.id
119 |             else:
120 |                 self.parent_experiment = os.environ.get('DEEPKIT_JOB_ID', None)
121 | 
122 |         self.metric_buffer = []
123 |         self.speed_buffer = []
124 |         self.logs_buffer = []
125 |         self.last_throttle_call = dict()
126 | 
127 |         self.client = deepkit.client.Client(project=project, account=account, try_pick_up=try_pick_up,
128 |                                             parent_experiment=self.parent_experiment, silent=self.silent)
129 | 
130 |         self.log_lock = Lock()
131 |         self.defined_metrics = {}
132 |         self.shutting_down = False
133 | 
134 |         self.last_iteration_time = 0
135 |         self.last_batch_time = 0
136 |         self.job_iteration = 0
137 |         self.job_iterations = 0
138 |         self.job_step = 0
139 |         self.job_steps = 0
140 | 
141 |         self.model_watching = dict()
142 | 
143 |         self.auto_x_of_metrix = dict()
144 |         self.auto_x_of_insight = dict()
145 |         self.created_insights = dict()
146 | 
147 |         self.seconds_per_iteration = 0
148 |         self.seconds_per_iterations = []
149 |         self.debugger = deepkit.debugger.DebuggerManager(self)
150 | 
151 |         if deepkit.utils.in_self_execution():
152 |             self.job_controller = JobController()
153 | 
154 |         self.debugger_controller: JobDebuggerController = JobDebuggerController(self.client)
155 | 
156 |         # runs in the client Thread
157 |         def on_connect(connected):
158 |             if connected:
159 |                 if deepkit.utils.in_self_execution():
160 |                     self.client.register_controller('job/' + self.client.job_id, self.job_controller)
161 | 
162 |                 self.client.register_controller('job/' + self.client.job_id + '/debugger', self.debugger_controller)
163 | 
164 |                 asyncio.run_coroutine_threadsafe(self.debugger_controller.connected(), loop=self.client.loop)
165 |             else:
166 |                 self.debugger.on_disconnect()
167 | 
168 |         self.client.connected.subscribe(on_connect)
169 | 
170 |         atexit.register(self.shutdown)
171 |         try:
172 |             self.client.connect()
173 |             self.wait_for_connect()
174 |         except Exception as e:
175 |             print("Error connecting to Deepkit. Experiment data sync aborted.", e)
176 |             if deepkit.globals.last_experiment is self:
177 |                 deepkit.globals.last_experiment = None
178 | 
179 |         if deepkit.utils.in_self_execution() and monitoring:
180 |             # the CLI handles output logging otherwise
181 |             if len(deepkit.globals.last_logs.getvalue()) > 0:
182 |                 self.logs_buffer.append(deepkit.globals.last_logs.getvalue())
183 | 
184 |         if deepkit.utils.in_self_execution() and monitoring:
185 |             # the CLI handles hardware monitoring otherwise
186 |             p = psutil.Process()
187 | 
188 |             def on_hardware_metrics(dummy):
189 |                 net = psutil.net_io_counters()
190 |                 disk = psutil.disk_io_counters()
191 |                 data = struct.pack(
192 |                     '<BHdHHffff',
193 |                     1,
194 |                     0,
195 |                     time.time(),
196 |                     # stretch to max precision of uint16
197 |                     min(65535, int(((p.cpu_percent(interval=None) / 100) / psutil.cpu_count(False)) * 65535)),
198 |                     # stretch to max precision of uint16
199 |                     min(65535, int((p.memory_percent() / 100) * 65535)),
200 |                     float(net.bytes_recv),
201 |                     float(net.bytes_sent),
202 |                     float(disk.write_bytes),
203 |                     float(disk.read_bytes),
204 |                 )
205 | 
206 |                 self.client.job_action_threadsafe('streamInternalFile',
207 |                                                   ['.deepkit/hardware/main_0.hardware',
208 |                                                    base64.b64encode(data).decode('utf8')])
209 | 
210 |             self.hardware_subscription = interval(1).subscribe(on_hardware_metrics)
211 | 
212 |     @property
213 |     def full_id(self):
214 |         return self.client.job_id
215 | 
216 |     @property
217 |     def id(self):
218 |         return self.client.job_id
219 | 
220 |     def throttle_call(self, fn: Callable, delay: int = 1000):
221 |         last_time = self.last_throttle_call.get(fn)
222 |         if not last_time or (time.time() - (delay / 1000)) > last_time:
223 |             self.last_throttle_call[fn] = time.time()
224 |             fn()
225 | 
226 |     def create_sub_experiment(self):
227 |         return Experiment(
228 |             project=self.project,
229 |             account=self.account,
230 |             parent_experiment=self.id,
231 |             silent=self.silent
232 |         )
233 | 
234 |     def drain_speed_report(self):
235 |         # only save latest value, each second
236 |         if len(self.speed_buffer) == 0: return
237 |         item = self.speed_buffer[-1]
238 |         self.speed_buffer = []
239 |         self.client.job_action_threadsafe(
240 |             'streamInternalFile',
241 |             ['.deepkit/speed.metric', base64.b64encode(item).decode('utf8')]
242 |         )
243 | 
244 |     def drain_logs(self):
245 |         if len(self.logs_buffer) == 0: return
246 |         packed = ''
247 |         buffer = self.logs_buffer.copy()
248 |         self.logs_buffer = []
249 |         for d in buffer:
250 |             packed += d
251 | 
252 |         self.client.job_action_threadsafe('log', ['main_0', packed])
253 | 
254 |     def drain_metric_buffer(self):
255 |         if len(self.metric_buffer) == 0:
256 |             return
257 |         buffer = self.metric_buffer.copy()
258 |         self.metric_buffer = []
259 |         try:
260 |             packed = {}
261 |             items = {}
262 |             for d in buffer:
263 |                 if d['id'] not in packed:
264 |                     packed[d['id']] = b''
265 |                     items[d['id']] = 0
266 | 
267 |                 items[d['id']] += 1
268 |                 packed[d['id']] += d['row']
269 | 
270 |             for i, v in packed.items():
271 |                 # print('channelData', items[i], len(v) / 27)
272 | 
273 |                 self.client.job_action_threadsafe('channelData', [i, base64.b64encode(v).decode('utf8')])
274 |         except Exception as e:
275 |             print('on_metric failed', e)
276 | 
277 |     def wait_for_connect(self):
278 |         async def wait():
279 |             await self.client.connecting
280 | 
281 |         asyncio.run_coroutine_threadsafe(wait(), self.client.loop).result()
282 | 
283 |     def done(self):
284 |         self.client.result_status = deepkit.client.JobStatus.done
285 |         self.shutdown()
286 | 
287 |     def abort(self):
288 |         self.client.result_status = deepkit.client.JobStatus.aborted
289 |         self.shutdown()
290 | 
291 |     def crash(self):
292 |         self.client.result_status = deepkit.client.JobStatus.crashed
293 |         self.shutdown()
294 | 
295 |     def failed(self):
296 |         self.client.result_status = deepkit.client.JobStatus.failed
297 |         self.shutdown()
298 | 
299 |     def shutdown(self):
300 |         if self.shutting_down: return
301 |         self.shutting_down = True
302 |         atexit.unregister(self.shutdown)
303 |         self.drain_metric_buffer()
304 |         self.drain_speed_report()
305 |         self.drain_logs()
306 |         self.client.shutdown()
307 | 
308 |     def epoch(self, current: int, total: Optional[int]):
309 |         self.iteration(current, total)
310 |         self.debugger.tick()
311 | 
312 |     def iteration(self, current: int, total: Optional[int]):
313 |         if current and self.job_iteration == current:
314 |             # nothing to do
315 |             return
316 | 
317 |         self.job_iteration = current
318 |         if total:
319 |             self.job_iterations = total
320 | 
321 |         now = time.time()
322 |         if self.last_iteration_time:
323 |             self.seconds_per_iterations.append({
324 |                 'diff': now - self.last_iteration_time,
325 |                 'when': now,
326 |             })
327 | 
328 |         self.last_iteration_time = now
329 |         self.last_batch_time = now
330 | 
331 |         # remove all older than twenty seconds
332 |         self.seconds_per_iterations = [x for x in self.seconds_per_iterations if (now - x['when']) < 20]
333 |         self.seconds_per_iterations = self.seconds_per_iterations[-30:]
334 | 
335 |         if len(self.seconds_per_iterations) > 0:
336 |             diffs = [x['diff'] for x in self.seconds_per_iterations]
337 |             self.seconds_per_iteration = sum(diffs) / len(diffs)
338 | 
339 |         if self.seconds_per_iteration:
340 |             self.client.patch('secondsPerIteration', self.seconds_per_iteration)
341 | 
342 |         self.client.patch('iteration', self.job_iteration)
343 |         if total:
344 |             self.client.patch('iterations', self.job_iterations)
345 | 
346 |         iterations_left = self.job_iterations - self.job_iteration
347 |         if iterations_left > 0:
348 |             self.client.patch('eta', self.seconds_per_iteration * iterations_left)
349 |         else:
350 |             self.client.patch('eta', 0)
351 | 
352 |     def batch(self, current: int, total: int = None, size: int = 1):
353 |         self.step(current, total, size)
354 | 
355 |     def step(self, current: int, total: int = None, size: int = 1):
356 |         if current and self.job_steps == current:
357 |             # nothing to do
358 |             return
359 | 
360 |         if current < self.job_step:
361 |             # it was reset, new epoch/iteration basically
362 |             self.job_step = 0
363 | 
364 |         steps_made = current - self.job_step
365 | 
366 |         self.job_step = current
367 |         if total is not None:
368 |             self.job_steps = total
369 |         if total is None:
370 |             total = self.job_steps
371 | 
372 |         self.client.patch('step', current)
373 |         now = time.time()
374 | 
375 |         x = self.job_iteration + (current / total)
376 |         speed_per_second = 0
377 |         if size:
378 |             speed_per_second = size / (now - self.last_batch_time) if self.last_batch_time else size
379 | 
380 |         if self.last_batch_time:
381 |             time_per_step = step_since_last_took = (now - self.last_batch_time)
382 |             if steps_made > 0:
383 |                 time_per_step = step_since_last_took / steps_made
384 | 
385 |             self.seconds_per_iterations.append({
386 |                 'diff': time_per_step * total,
387 |                 'when': now
388 |             })
389 | 
390 |         # remove all older than twenty seconds
391 |         self.seconds_per_iterations = [x for x in self.seconds_per_iterations if (now - x['when']) < 20]
392 |         self.seconds_per_iterations = self.seconds_per_iterations[-30:]
393 | 
394 |         if len(self.seconds_per_iterations) > 0:
395 |             diffs = [x['diff'] for x in self.seconds_per_iterations]
396 |             self.seconds_per_iteration = sum(diffs) / len(diffs)
397 | 
398 |             iterations_left = self.job_iterations - self.job_iteration
399 |             self.client.patch('eta', self.seconds_per_iteration * iterations_left)
400 | 
401 |         self.last_batch_time = now
402 | 
403 |         if self.seconds_per_iteration:
404 |             self.client.patch('secondsPerIteration', self.seconds_per_iteration)
405 | 
406 |         self.client.patch('speed', speed_per_second)
407 | 
408 |         speed = struct.pack('<Bddd', 1, float(x), now, float(speed_per_second))
409 |         self.speed_buffer.append(speed)
410 |         self.drain_logs()
411 | 
412 |         if total:
413 |             self.client.patch('steps', total)
414 | 
415 |         self.debugger.tick()
416 | 
417 |     def set_title(self, s: str):
418 |         self.client.patch('title', s)
419 | 
420 |     def set_info(self, name: str, value: any):
421 |         self.client.patch('infos.' + str(name).replace('.', '/'), value)
422 | 
423 |     def set_description(self, description: any):
424 |         self.client.patch('description', description)
425 | 
426 |     def add_label(self, *label_name: str):
427 |         for name in label_name:
428 |             self.client.job_action_threadsafe('addLabel', [name])
429 | 
430 |     def remove_label(self, label_name: str):
431 |         self.client.job_action_threadsafe('removeLabel', [label_name])
432 | 
433 |     def set_config(self, name: str, value: any):
434 |         self.client.patch('config.config.' + str(name).replace('.', '/'), value)
435 | 
436 |     def set_full_config(self, config: any):
437 |         self.client.patch('config.config', config)
438 | 
439 |     def define_metric(self, name: str, traces: List[str] = None):
440 |         name = name.replace('.', '/')
441 |         if not traces:
442 |             traces = ['0']
443 |         self.defined_metrics[name] = {'traces': traces}
444 |         self.client.job_action_threadsafe('defineMetric', [name, self.defined_metrics[name]])
445 | 
446 |         that = self
447 | 
448 |         class Controller:
449 |             def send(self, *y, x=None):
450 |                 that.log_metric(name, *y, x=x)
451 | 
452 |         return Controller()
453 | 
454 |     def add_output_file(self, path: str):
455 |         self.add_file(path, as_output=True)
456 | 
457 |     def add_output_content(self, path: str, content):
458 |         self.add_file_content(path, content, as_output=True)
459 | 
460 |     def add_file(self, path: str, as_output=False):
461 |         relative_path = os.path.relpath(path, os.getcwd())
462 |         if 'DEEPKIT_ROOT_DIR' in os.environ:
463 |             relative_path = os.path.relpath(path, os.environ['DEEPKIT_ROOT_DIR'])
464 | 
465 |         if '..' in relative_path:
466 |             relative_path = '__parent/' + relative_path.replace('..', '__')
467 | 
468 |         self.add_file_content(relative_path, open(path, 'rb').read(), as_output=as_output)
469 | 
470 |     def add_file_content(self, path: str, content, as_output=False):
471 |         if isinstance(content, (dict, list, tuple)):
472 |             content = json.dumps(content)
473 | 
474 |         if isinstance(content, str):
475 |             content = bytes(content, encoding='utf-8')
476 | 
477 |         if not isinstance(content, bytes):
478 |             raise Exception('Data type is not supported. Please provide str, bytes, or dict/list/tuple.')
479 | 
480 |         method = 'uploadOutputFile' if as_output else 'uploadFile'
481 |         self.client.job_action_threadsafe(method, [path, base64.b64encode(content).decode('utf8')])
482 | 
483 |     def set_list(self, name: str):
484 |         self.client.job_action_threadsafe('setList', [name])
485 | 
486 |     def full_config(self):
487 |         return get_job_config()
488 | 
489 |     def get_config(self, path, default=None):
490 |         res = deepkit.utils.get_parameter_by_path(get_job_config(), path)
491 |         if res is None:
492 |             self.set_config(path, default)
493 |             return default
494 | 
495 |         return res
496 | 
497 |     def intconfig(self, path, default=None):
498 |         v = self.get_config(path, default)
499 |         return int(v) if v is not None else default
500 | 
501 |     def floatconfig(self, path, default=None):
502 |         v = self.get_config(path, default)
503 |         return float(v) if v is not None else default
504 | 
505 |     def boolconfig(self, path, default=None):
506 |         v = self.get_config(path, default)
507 |         if v is None:
508 |             return default
509 |         if not v or v is 'false' or v is 0 or v is '0':
510 |             return False
511 |         return True
512 | 
513 |     def config(self, path, default=None):
514 |         v = self.get_config(path, default)
515 |         return v if v is not None else default
516 | 
517 |     def watch_keras_model(self, model, model_input=None, name=None, is_batch=True):
518 |         if model in self.model_watching: return
519 |         self.model_watching[model] = True
520 | 
521 |         from deepkit.keras_tf import TFDebugger, extract_model_graph
522 |         name = name if name else model.name
523 | 
524 |         graph, record_map, input_names = extract_model_graph(model)
525 |         debugger = TFDebugger(self.debugger, model, model_input, name, record_map, is_batch, input_names)
526 | 
527 |         if not model_input:
528 |             # we monkey patch entry methods to keras so we automatically fetch the model_input
529 |             ori_fit_generator = model.fit_generator
530 |             ori_fit = model.fit
531 |             ori_train_on_batch = model.train_on_batch
532 |             ori_predict = model.predict
533 | 
534 |             def fit_generator(generator, *args, **kwargs):
535 |                 if debugger.model_input is None:
536 |                     debugger.set_input(next(iter(generator)))
537 |                 return ori_fit_generator(generator, *args, **kwargs)
538 | 
539 |             model.fit_generator = fit_generator
540 | 
541 |             def fit(x=None, *args, **kwargs):
542 |                 if debugger.model_input is None:
543 |                     debugger.set_input(x)
544 |                 return ori_fit(x, *args, **kwargs)
545 | 
546 |             model.fit = fit
547 | 
548 |             def train_on_batch(x=None, *args, **kwargs):
549 |                 if debugger.model_input is None:
550 |                     debugger.set_input(x)
551 |                 return ori_train_on_batch(x, *args, **kwargs)
552 | 
553 |             model.train_on_batch = train_on_batch
554 | 
555 |             def predict(x=None, *args, **kwargs):
556 |                 if debugger.model_input is None:
557 |                     debugger.set_input(x)
558 |                 return ori_predict(x, *args, **kwargs)
559 | 
560 |             model.predict = predict
561 | 
562 |         self.debugger.register_debugger(debugger)
563 |         self.client.job_action_threadsafe('setModelGraph', [graph, name])
564 |         return debugger
565 | 
566 |     def watch_torch_model(self, model, name='main'):
567 |         if model in self.model_watching: return
568 |         self.model_watching[model] = True
569 |         from deepkit.pytorch import TorchDebugger
570 | 
571 |         def resolve_map(inputs):
572 |             graph, record_map, input_names, output_names = self._set_torch_model(model, name=name, inputs=inputs)
573 |             return record_map, input_names, output_names
574 | 
575 |         debugger = TorchDebugger(self.debugger, model, name, resolve_map)
576 |         self.debugger.register_debugger(debugger)
577 |         return debugger
578 | 
579 |     def _set_torch_model(self, model, input_shape=None, input_sample=None, inputs=None, name='main'):
580 |         """
581 |         Extracts the computation graph using either the given input_shape with random data
582 |         or the given (real) input_sample. If you have multiple models per training, use the name
583 |         argument to differentiate.
584 |         :param model: your pytorch model instance
585 |         :param input_shape: shape like (1, 32, 32) or a list of input shapes for multi input ((1, 3, 64, 64), (10,)).
586 |                             Don't forget to specify the batch dimension.
587 |         :param input_sample: a simple (not in a batch)
588 |         :param inputs: full inputs list with real examples as if `model(*inputs)` is  called
589 |         :param name: optional name if you have multiple models
590 |         :return:
591 |         """
592 |         from torch import from_numpy
593 |         from deepkit.pytorch import get_pytorch_graph
594 | 
595 |         if not inputs and not input_shape and input_sample is None:
596 |             raise Exception('No inputs, input_shape and no input_sample given. Specify either of those.')
597 |         xs = inputs
598 | 
599 |         if xs is None:
600 |             if input_sample is not None:
601 |                 if isinstance(input_sample, (tuple, list)):
602 |                     xs = input_sample
603 |                 else:
604 |                     # we got single input sample
605 |                     xs = [input_sample]
606 | 
607 |             if xs is None:
608 |                 # we need a batch size of 1
609 |                 if isinstance(input_shape[0], (tuple, list)):
610 |                     # we got multi inputs
611 |                     xs = []
612 |                     for i in range(0, len(input_shape)):
613 |                         # convert to float32 per default
614 |                         x = (from_numpy(np.random.random_sample(input_shape[i]).astype(np.single)),)
615 |                         xs.append(x)
616 |                 else:
617 |                     # convert to float32 per default
618 |                     xs = (from_numpy(np.random.random_sample(input_shape).astype(np.single)),)
619 | 
620 |         graph, record_map, input_names, output_names = get_pytorch_graph(model, xs)
621 | 
622 |         self.client.job_action_threadsafe('setModelGraph', [graph, name])
623 |         return graph, record_map, input_names, output_names
624 | 
625 |     def log_insight(self, *data, name: str, x=None, image_convertion=True, meta=None):
626 |         if x is None:
627 |             if self.job_steps > 0:
628 |                 x = self.job_iteration + (self.job_step / self.job_steps)
629 |             elif self.job_iteration > 0:
630 |                 x = self.job_iteration
631 |             else:
632 |                 if name not in self.auto_x_of_insight:
633 |                     self.auto_x_of_insight[name] = 0
634 |                 self.auto_x_of_insight[name] += 1
635 |                 x = self.auto_x_of_insight[name]
636 | 
637 |         if not isinstance(x, (int, float)):
638 |             raise Exception('x needs to be integer or float')
639 | 
640 |         if x not in self.created_insights:
641 |             self.created_insights[x] = True
642 |             self.client.job_action_threadsafe('addInsight', [
643 |                 x,
644 |                 time.time(),
645 |                 self.job_iteration,
646 |                 self.job_step,
647 |             ])
648 | 
649 |         for i, d in enumerate(data):
650 |             file_type = ''
651 |             if isinstance(d, PIL.Image.Image):
652 |                 file_type = 'png'
653 |                 d = pil_image_to_jpeg(d)
654 |             elif isinstance(d, np.ndarray):
655 |                 # tf is not batch per default
656 | 
657 |                 if image_convertion:
658 |                     sample = np.copy(d)
659 |                     shape = d.shape
660 |                     image = False
661 |                     if len(shape) == 3:
662 |                         try:
663 |                             if 'keras' in sys.modules:
664 |                                 import keras
665 |                                 if keras.backend.image_data_format() == 'channels_last':
666 |                                     sample = np.transpose(sample, (2, 0, 1))
667 |                             elif 'tensorflow.keras' in sys.modules:
668 |                                 import tensorflow.keras as keras
669 |                                 if keras.backend.image_data_format() == 'channels_last':
670 |                                     sample = np.transpose(sample, (2, 0, 1))
671 |                         except:
672 |                             pass
673 | 
674 |                         if sample.shape[0] == 3:
675 |                             d = PIL.Image.fromarray(get_layer_vis_square(sample))
676 |                             image = True
677 |                         else:
678 |                             d = PIL.Image.fromarray(get_image_tales(sample))
679 |                             image = True
680 |                     elif len(shape) > 1:
681 |                         d = PIL.Image.fromarray(get_layer_vis_square(sample))
682 |                         image = True
683 |                     elif len(shape) == 1:
684 |                         if shape[0] != 1:
685 |                             # we got a single number
686 |                             d = sample[0]
687 |                         else:
688 |                             d = make_image_from_dense(sample)
689 |                             image = True
690 |                     if image:
691 |                         file_type = 'png'
692 |                         d = pil_image_to_jpeg(d)
693 |                     else:
694 |                         file_type = 'npy'
695 |                         d = numpy_to_binary(d)
696 |                 else:
697 |                     file_type = 'npy'
698 |                     d = numpy_to_binary(d)
699 |             else:
700 |                 file_type = 'json'
701 |                 d = bytes(json.dumps(d), encoding='utf-8')
702 | 
703 |             if len(data) > 1:
704 |                 file_name = name + '_' + str(i) + '.' + file_type
705 |             else:
706 |                 file_name = name + '.' + file_type
707 | 
708 |             self.client.job_action_threadsafe('addInsightEntry', [
709 |                 x,
710 |                 file_name,
711 |                 datetime.utcnow().isoformat(),
712 |                 {
713 |                     'type': file_type,
714 |                     'meta': meta
715 |                 },
716 |                 base64.b64encode(d).decode(),
717 |             ])
718 | 
719 |     def log_metric(self, name: str, *y, x=None):
720 |         if y is None:
721 |             y = 0
722 | 
723 |         if not isinstance(y, (list, tuple)):
724 |             y = [y]
725 | 
726 |         def convert(v):
727 |             if v is None: return 0
728 |             if math.isnan(v): return 0
729 |             return float(v)
730 | 
731 |         y = [convert(v) for v in y]
732 | 
733 |         if x is None:
734 |             if self.job_steps > 0:
735 |                 x = self.job_iteration + (self.job_step / self.job_steps)
736 |             else:
737 |                 if name not in self.auto_x_of_metrix:
738 |                     self.auto_x_of_metrix[name] = 0
739 |                 self.auto_x_of_metrix[name] += 1
740 |                 x = self.auto_x_of_metrix[name]
741 | 
742 |         name = name.replace('.', '/')
743 | 
744 |         if name not in self.defined_metrics:
745 |             traces = [str(i) for i, _ in enumerate(y)]
746 |             self.define_metric(name, traces=traces)
747 |         else:
748 |             if 'traces' in self.defined_metrics[name] and len(self.defined_metrics[name]['traces']) != len(y):
749 |                 traces = self.defined_metrics[name]['traces']
750 |                 raise Exception(f'Metric {name} has {len(traces)} traces defined, but you provided {len(y)}')
751 | 
752 |         row_binary = struct.pack('<BHdd', 1, len(y), float(x), time.time())
753 |         for y1 in y:
754 |             row_binary += struct.pack('<d', float(y1) if y1 is not None else 0.0)
755 | 
756 |         self.client.patch('channelLastValues.' + name, y)
757 |         self.metric_buffer.append({'id': name, 'row': row_binary})
758 |         self.drain_metric_buffer()
759 | 
760 |     def create_keras_callback(self, model=None, debug_model_input=None):
761 |         from .deepkit_keras import KerasCallback
762 |         callback = KerasCallback(debug_model_input)
763 |         if model:
764 |             self.watch_keras_model(model)
765 | 
766 |         return callback
767 | 
768 |     def log(self, s: str):
769 |         self.logs_buffer.append(s)
770 |         self.drain_logs()
771 | 


--------------------------------------------------------------------------------