├── platoon
    ├── tests
    │   ├── __init__.py
    │   ├── unit
    │   │   ├── __init__.py
    │   │   ├── test_configparser.py
    │   │   ├── test_util.py
    │   │   └── test_controller.py
    │   └── functional
    │   │   ├── README.md
    │   │   ├── time_worker.py
    │   │   ├── test_worker.py
    │   │   ├── test_global_dynamics_worker.py
    │   │   └── test_ops_worker.py
    ├── training
    │   ├── __init__.py
    │   └── global_dynamics.py
    ├── __init__.py
    ├── channel
    │   └── __init__.py
    ├── configparser.py
    ├── param_sync.py
    ├── util.py
    └── ops.py
├── example
    ├── data
    │   ├── .gitignore
    │   └── readme.txt
    ├── simple_batched_pixel_sum
    │   ├── README.txt
    │   ├── batched_pixel_sum.py
    │   ├── batched_pixel_sum_worker.py
    │   └── batched_pixel_sum_controller.py
    ├── lstm
    │   ├── README.txt
    │   ├── lstm_controller.py
    │   ├── imdb.py
    │   └── lstm_worker.py
    └── synchronous_lstm
    │   ├── README.txt
    │   ├── imdb.py
    │   ├── lstm_controller.py
    │   └── lstm_worker.py
├── setup.py
├── .gitignore
├── LICENSE
├── platoonrc.conf
├── README.md
├── scripts
    └── platoon-launcher
└── doc
    └── platoon
        └── control_request.svg


/platoon/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/platoon/training/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/platoon/tests/unit/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/example/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !readme.txt
4 | 


--------------------------------------------------------------------------------
/example/data/readme.txt:
--------------------------------------------------------------------------------
1 | This file is here so the directory exists.
2 | 


--------------------------------------------------------------------------------
/platoon/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .channel.controller import Controller
3 | from .channel.worker import Worker
4 | 


--------------------------------------------------------------------------------
/platoon/channel/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | :mod:`channel` -- Platoon's communication backend
 3 | =================================================
 4 | 
 5 | .. module:: channel
 6 |    :platform: Unix
 7 |    :synopsis: Contains controller and worker modules which compose Platoon's
 8 |               communication architecture.
 9 | 
10 | This file serves as a backwards compatibility layer for Platoon v0.5.0.
11 | 
12 | """
13 | from __future__ import absolute_import
14 | from .worker import Worker
15 | from .controller import Controller
16 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from setuptools import setup
 3 | 
 4 | setup(
 5 |     name='platoon',
 6 |     version='0.6.1',
 7 |     author='MILA',
 8 |     packages=['platoon', 'platoon.channel', 'platoon.training'],
 9 |     scripts=['scripts/platoon-launcher'],
10 |     url='https://github.com/mila-udem/platoon/',
11 |     license='MIT',
12 |     description='Experimental multi-GPU mini-framework for Theano',
13 |     long_description=open('README.md').read(),
14 |     install_requires=['numpy', 'cffi', 'pyzmq', 'posix_ipc', 'six']
15 | )
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # Platoon log folders
60 | PLATOON_LOGS/
61 | *.prof
62 | *.out
63 | *.err
64 | *.conf
65 | .platoonrc
66 | conf_from_topo.py
67 | test.sh
68 | test_scripts/
69 | !./platoonrc.conf
70 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 mila-udem
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/example/simple_batched_pixel_sum/README.txt:
--------------------------------------------------------------------------------
 1 | ## GOAL ##
 2 | The goal of this example is to showcase Platoon's functionality in the simplest way possible.
 3 | 
 4 | 
 5 | ## CONTENT ##
 6 | - README.txt : This file!
 7 | - batched_pixel_sum.py : A simple Theano pixel-wise sum on MNIST
 8 | - batched_pixel_sum_controller.py : A platoon implementation of batched_pixel_sum.py
 9 |   batched_pixel_sum_worker.py
10 | 
11 | 
12 | ## HOW TO USE ##
13 | # USING THE LAUNCHER
14 | 1) Assuming you are in the simple_batched_pixel_sum folder.
15 |    `cd platoon/example/simple_batched_pixel_sum/`
16 | 
17 | 2) Launch the experiment on 1 gpu using the platoon-launcher script.
18 |    All the outputs will be saved in a newly created `PLATOON_LOGS` folder.
19 |    `platoon-launcher batched_pixel_sum gpu0`
20 | 
21 | # MANUALLY
22 | 1) Assuming you are in the simple_batched_pixel_sum folder.
23 |    `cd platoon/example/simple_batched_pixel_sum/`
24 | 
25 | 2) Start the controller.
26 |    `THEANO_FLAGS='device=cpu' python -u batched_pixel_sum_controller.py`
27 | 
28 | 3) Start the worker.
29 |    `THEANO_FLAGS='device=gpu0' python -u batched_pixel_sum_worker.py`
30 | 
31 | 
32 | ## NOTE ##
33 | - Using more than 1 worker causes problem at the moment for THIS particular example.
34 |   The reason is that we are using the "dataset handled by the controller" feature which is not quite ready yet.
35 | 


--------------------------------------------------------------------------------
/platoon/tests/functional/README.md:
--------------------------------------------------------------------------------
 1 | To functional test the *all_reduce* worker interface, you need to:
 2 | 
 3 | 1. Export the environmental variable `PLATOON_TEST_WORKERS_NUM` to be equal to
 4 |    the total number of workers (GPUs) to be spawned across hosts in the
 5 |    functional test.
 6 | 2. Call `platoon-launcher test` to start the test while being in the same
 7 |    directory as `test_worker.py` file. You can configure the multi-GPU/node
 8 |    procedure in any possible way as long as the total number of workers, which
 9 |    was set in the previous step, is respected.
10 | 
11 | The procedure exits with 0 for success. If this does not hold, please check
12 | `platoon-launcher`, in order to see a high-level description of the return
13 | code, and `PLATOON_LOGS` of the late procedure in current directory.
14 | 
15 | To profile and benchmark the new worker interface, you need to run
16 | `platoon-launcher time` in current directory. Results are written in
17 | `PLATOON_LOGS`.
18 | 
19 | To test and profile the Theano Ops of worker interface, you need to run
20 | `platoon-launcher test_ops` in current directory.
21 | 
22 | To test implementations of global dynamics, please run
23 | `platoon-launcher test_global_dynamics` in current directory.
24 | 
25 | **Note**: Depending on your hardware configuration, launching on defaults
26 | Platoon may not suffice for a successful execution. Please check the
27 | documentation and *platoonrc.conf* on how to configure Platoon.
28 | 


--------------------------------------------------------------------------------
/platoon/tests/unit/test_configparser.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import os
 3 | import unittest
 4 | from six.moves import reload_module as reload
 5 | 
 6 | from ... import configparser as cfgp
 7 | 
 8 | 
 9 | def test_fetch_hosts_from_envs():
10 |     if os.getenv("PLATOON_HOSTS"):
11 |         os.environ.pop("PLATOON_HOSTS")
12 |     true_hosts = ["test0", "tes1", "te2"]
13 |     os.environ["PLATOON_HOSTS"] = "test0,tes1,te2"
14 |     reload(cfgp)
15 |     hosts = cfgp.fetch_hosts()
16 |     assert hosts == true_hosts, (hosts)
17 | 
18 | 
19 | def test_fetch_hosts_from_rc():
20 |     if os.getenv("PLATOON_HOSTS"):
21 |         os.environ.pop("PLATOON_HOSTS")
22 |     os.environ["PLATOONRC"] = "../../../platoonrc.conf"
23 |     reload(cfgp)
24 |     hosts = cfgp.fetch_hosts()
25 |     assert hosts == ["lisa0", "lisa1", "lisa3"], (hosts)
26 | 
27 | 
28 | def test_fetch_devices_from_envs():
29 |     if os.getenv("PLATOON_DEVICES"):
30 |         os.environ.pop("PLATOON_DEVICES")
31 |     os.environ["PLATOON_DEVICES"] = "cuda0,opencl0:1"
32 |     reload(cfgp)
33 |     devices = cfgp.fetch_devices_for_host("asfasfa")
34 |     assert devices == ["cuda0", "opencl0:1"], (devices)
35 | 
36 | 
37 | def test_fetch_devices_from_rc():
38 |     if os.getenv("PLATOON_DEVICES"):
39 |         os.environ.pop("PLATOON_DEVICES")
40 |     os.environ["PLATOON_DEVICES"] = ""
41 |     os.environ["PLATOONRC"] = "../../../platoonrc.conf"
42 |     reload(cfgp)
43 |     devs = cfgp.fetch_devices_for_host("lisa0")
44 |     assert devs == ["cuda0", "cuda1"], (devs)
45 |     devs = cfgp.fetch_devices_for_host("lisa1")
46 |     assert devs == ["cuda3", "cuda0"], (devs)
47 |     devs = cfgp.fetch_devices_for_host("lisa3")
48 |     assert devs == ["cuda"], (devs)
49 |     keyerror = False
50 |     try:
51 |         devs = cfgp.fetch_devices_for_host("asfasfa")
52 |     except KeyError:
53 |         keyerror = True
54 |     except:
55 |         pass
56 |     assert keyerror
57 | 


--------------------------------------------------------------------------------
/example/lstm/README.txt:
--------------------------------------------------------------------------------
 1 | ## GOAL ##
 2 | LSTM example using Platoon *param sync* interface
 3 | 
 4 | 
 5 | ## CONTENT ##
 6 | - README.txt
 7 | - lstm_controller.py
 8 | - lstm_worker.py
 9 | - imdb.py
10 | 
11 | 
12 | ## HOW TO USE ##
13 | # USING THE LAUNCHER
14 | 1) Assuming you are in the lstm folder.
15 |    `cd platoon/example/lstm/`
16 | 
17 | 1) Launch the experiment on 2 GPUs using the platoon-launcher script.
18 |    `platoon-launcher lstm -D cuda0 cuda2`
19 | 
20 | To see all controller parameters do: `python lstm_controller.py -h`
21 | To pass them via the platoon-launcher script: `platoon-launcher lstm -D cuda0 cuda2 -c=...`
22 | 
23 | To see all worker parameters do: `python lstm_worker.py -h`
24 | To pass them via the platoon-launcher script: `platoon-launcher lstm -D cuda0 cuda2 -w=...`
25 | 
26 | # MANUALLY
27 | 1) Assuming you are in the lstm folder.
28 |    `cd platoon/example/lstm/`
29 | 
30 | 2) Start the controller.
31 |    `THEANO_FLAGS='device=cpu' python -u lstm_controller.py`
32 | 
33 | 3) Start the worker. Repeat as needed changing the GPU id.
34 |    `THEANO_FLAGS='device=gpu0' python -u lstm_worker.py`
35 | 
36 | 
37 | ## NOTE ##
38 | If you use the MANUAL way, you may want to run them in different windows of screen or tmux.
39 | They all expect to be in the foreground.
40 | 
41 | 
42 | ## Timing ##
43 | This timing was done with 2 k80.
44 | The timing is about efficiency of computation, not efficiency of
45 | training.  So the parameter alpha is constant. The number of mini-batches
46 | is fixed as the hyper-parameter. The sync is also fixed to be after 10
47 | mini-batch of computation.
48 | 
49 | With 1 worker, Platoon does not give you any advantage. This is
50 | there just to show the overhead of the EASGD implementation.  Normal
51 | is without this framework and with SGD, also there for overhead evaluation.
52 | 
53 | Normal | 1 GPU | 2 GPUs | 3 GPUs | 4 GPUs
54 | -------|-------|--------|--------|-------
55 |   870s |  912s |  477s  |  329s  |  254s
56 |  1.00x | 0.95x | 1.82x  | 2.65x  | 3.42x
57 | 


--------------------------------------------------------------------------------
/example/synchronous_lstm/README.txt:
--------------------------------------------------------------------------------
 1 | ## GOAL ##
 2 | LSTM example using Platoon *all reduce* interface
 3 | 
 4 | 
 5 | ## CONTENT ##
 6 | - README.txt
 7 | - lstm_controller.py
 8 | - lstm_worker.py
 9 | - imdb.py
10 | It is assumed that imdb.pkl is in the same foler, otherwise it will be downloaded.
11 | 
12 | 
13 | ## HOW TO USE ##
14 | # USING THE LAUNCHER
15 | When the launcher is used, the outputs and errors of controller and workers are automatically
16 | stored in an auto-generated folder of PLATOON_LOGS/lstm/DATE_TIME.
17 | 
18 | 1) Assuming you are in the synchronous_lstm folder.
19 |    `cd platoon/example/synchronous_lstm/`
20 | 
21 | 2) Launch the experiment on 2 GPUs using the platoon-launcher script.
22 |    `platoon-launcher lstm -D cuda0 cuda1`
23 | 
24 | To see all controller parameters do: `python lstm_controller.py -h`
25 | To pass them via the platoon-launcher script: `platoon-launcher lstm -D cuda0 cuda1 -c=...`
26 | 
27 | To see all worker parameters do: `python lstm_worker.py -h`
28 | To pass them via the platoon-launcher script: `platoon-launcher lstm -D cuda0 cuda1 -w=...`
29 | 
30 | 
31 | For setting THEANO_FLAGS for the workers, you can use the
32 | following command which sets floatX to float32 for all the workers:
33 | `THEANO_FLAGS=floatX=float32 platoon-launcher lstm -D cuda0 cuda1`
34 | 
35 | # USING THE SCRIPTS
36 | When the scripts are used the path to store the outputs can be given.
37 | 
38 | 1) Assuming you are in the synchronous_lstm folder.
39 |    `cd platoon/example/synchronous_lstm/`
40 | 
41 | 2) Launch the experiment. Platoon will automatically find all the available GPUs
42 |    and run the workers on them:
43 |    THEANO_FLAGS=floatX=float32 python lstm_controller.py --single lstm PATH/TO/OUTPUT
44 | 
45 | --single indicates the GPUs are all on the same machine.
46 | lstm is the name of the experiment. It will look for an lstm_worker.py to run the workers.
47 | THEANO_FLAGS are set for all the workers and not the controller. The controller should use
48 | the CPU.
49 | 
50 | 
51 | ## TIMING ##
52 | These timings were done using the Nvidia DGX-1 and by averaging results from 
53 | two runs for each setup. 
54 | 1 GPU : 5.698 seconds / epoch
55 | 2 GPU : 2.230 seconds / epoch
56 | 


--------------------------------------------------------------------------------
/example/simple_batched_pixel_sum/batched_pixel_sum.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import gzip
 3 | from six.moves import cPickle
 4 | import numpy as np
 5 | from numpy.testing import assert_allclose
 6 | import theano
 7 | from theano import tensor as T
 8 | from theano.compat.python2x import OrderedDict
 9 | 
10 | 
11 | class BatchedPixelSum(object):
12 | 
13 |     def __init__(self, dataset, batch_size):
14 |         self._batch_size = batch_size
15 |         self._dataset = dataset
16 | 
17 |         self._computed_sum = theano.shared(value=np.zeros(dataset.shape[1], dtype=theano.config.floatX), name='sum', borrow=True)
18 | 
19 |         input = T.matrix(dtype=theano.config.floatX)
20 |         batch_sum = T.sum(input, axis=0, dtype=theano.config.floatX)
21 | 
22 |         updates = OrderedDict()
23 |         updates[self._computed_sum] = (self._computed_sum + batch_sum)
24 | 
25 |         self._update_sum = theano.function(name='learn',
26 |                                            inputs=[input],
27 |                                            updates=updates)
28 | 
29 |     def get_sum(self):
30 |         for i in xrange(self._dataset.shape[0]/self._batch_size):
31 |             batch_start = i*self._batch_size
32 |             batch_stop = (i + 1)*self._batch_size
33 |             print("Summing from {} to {}.".format(batch_start, batch_stop))
34 |             self._update_sum(self._dataset[batch_start:batch_stop])
35 |         return self._computed_sum.get_value()
36 | 
37 | 
38 | def parse_arguments():
39 |     import argparse
40 | 
41 |     parser = argparse.ArgumentParser()
42 |     parser.add_argument('--batch-size', default=1000, type=int, required=False, help='Size of the batches.')
43 | 
44 |     return parser.parse_args()
45 | 
46 | 
47 | def get_mnist(path):
48 |     import os
49 |     import urllib
50 | 
51 |     if not os.path.exists(path):
52 |         print("Downloading mnist ...", end=' ')
53 |         url = "http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz"
54 | 
55 |         urllib.urlretrieve(url, path)
56 |         print("Done")
57 | 
58 | if __name__ == '__main__':
59 |     args = parse_arguments()
60 | 
61 |     mnist_path = "../data/mnist.pkl.gz"
62 | 
63 |     get_mnist(mnist_path)
64 | 
65 |     with gzip.open(mnist_path, 'rb') as f:
66 |         train_set, _, _ = cPickle.load(f)
67 | 
68 |     bps = BatchedPixelSum(train_set[0], args.batch_size)
69 | 
70 |     computed_sum = bps.get_sum()
71 | 
72 |     # Get actual answer for testing
73 |     real_sum = train_set[0].sum(axis=0, dtype=theano.config.floatX)
74 |     assert_allclose(computed_sum, real_sum)
75 | 


--------------------------------------------------------------------------------
/platoonrc.conf:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #                        Platoon Configuration Options                         #
 3 | ################################################################################
 4 | #
 5 | # Configuring hosts for multi-node training (in decreasing order of priority):
 6 | # 1. Use `-H` option of `platoon2-launcher`.
 7 | # 2. Use `PLATOON_HOSTS` environmental variable to be a list of comma-separated
 8 | #    hostnames.
 9 | #    e.g. PLATOON_HOSTS="lisa1,lisa3"
10 | # 3. Use `PLATOONRC` environmental variable to point to paths of
11 | #    configuration files, like this. Files have decreasing order of priority
12 | #    from right to left.
13 | #    e.g. PLATOONRC="~/platoon.conf.d/morn.conf:~/platoon.conf.d/even.conf"
14 | # 4. Use of `./.platoonrc`.
15 | # 5. Use of `~/.platoonrc`.
16 | #
17 | # If no hosts can be infered, then single-node training is assumed on the host
18 | # on which `platoon2-launcher` is executed. If a single host can be infered,
19 | # then that host must be the one on which `platoon2-launcher` is executed.
20 | # Currently, starting single-node training on other hosts, than the one which
21 | # `platoon2-launcher` is executed, is not supported.
22 | #
23 | # Configuring devices for multi-gpu/node training (in decreasing order of
24 | # priority):
25 | # 1. Use `-D` option of `platoon2-launcher` [NOTE: for single-node training!]
26 | # 2. Use `PLATOON_DEVICES` environmental variable to be a list of
27 | #    comma-separated Theano device names [NOTE: Must be set separately for every
28 | #    host for multi-node]
29 | #    e.g. PLATOON_DEVICES="cuda0,cuda3"
30 | # 3. Use `PLATOONRC` environmental variable to point to paths of
31 | #    configuration files, like this. Files have decreasing order of priority
32 | #    from right to left.
33 | #    e.g. PLATOONRC="~/platoon.conf.d/morn.conf:~/platoon.conf.d/even.conf"
34 | # 4. Use of `./.platoonrc`
35 | # 5. Use of `~/.platoonrc`
36 | #
37 | # If no devices can be inferred from the above, then a query to use all
38 | # compatible devices (currently CUDA GPUs) on a host will start using pygpu
39 | # interface, if available. If this fails (e.g. due to absence of pygpu package),
40 | # an error will be reported and processes will exit.
41 | #
42 | # This file serves as a template for configuring Platoon through a .platoonrc
43 | # file.
44 | #
45 | 
46 | # Three hosts: lisa0, lisa1, lisa3
47 | [platoon]
48 | hosts : lisa0
49 |         lisa1, lisa3
50 | 
51 | # Use cuda0 and cuda1 on lisa0, cuda3 and cuda0 on lisa1, cuda on lisa3
52 | [devices]
53 | lisa0 : cuda0
54 |         cuda1
55 | lisa1 : cuda3,cuda0
56 | lisa3 : cuda
57 | 


--------------------------------------------------------------------------------
/platoon/tests/unit/test_util.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import unittest
 3 | from six.moves import reload_module as reload
 4 | 
 5 | import numpy as np
 6 | 
 7 | from ... import util
 8 | 
 9 | try:
10 |     from mpi4py import MPI
11 |     MPI_IMPORTED = True
12 | except:
13 |     MPI_IMPORTED = False
14 | 
15 | 
16 | class TestOpToMPI(unittest.TestCase):
17 |     @unittest.skipUnless(MPI_IMPORTED, "Needs mpi4py module")
18 |     def test_op_to_mpi(self):
19 |         reload(util)
20 |         assert util.op_to_mpi('+') == MPI.SUM
21 |         assert util.op_to_mpi("sum") == MPI.SUM
22 |         assert util.op_to_mpi("add") == MPI.SUM
23 |         assert util.op_to_mpi('*') == MPI.PROD
24 |         assert util.op_to_mpi("prod") == MPI.PROD
25 |         assert util.op_to_mpi("product") == MPI.PROD
26 |         assert util.op_to_mpi("mul") == MPI.PROD
27 |         assert util.op_to_mpi("max") == MPI.MAX
28 |         assert util.op_to_mpi("maximum") == MPI.MAX
29 |         assert util.op_to_mpi("min") == MPI.MIN
30 |         assert util.op_to_mpi("minimum") == MPI.MIN
31 | 
32 |     def test_op_to_mpi_import_fail(self):
33 |         util.MPI = None
34 |         with self.assertRaises(AttributeError):
35 |             util.op_to_mpi('+')
36 | 
37 |     @unittest.skipUnless(MPI_IMPORTED, "Needs mpi4py module")
38 |     def test_op_to_mpi_op_fail(self):
39 |         reload(util)
40 |         with self.assertRaises(ValueError):
41 |             util.op_to_mpi('asdfasfda')
42 |         with self.assertRaises(ValueError):
43 |             util.op_to_mpi('-')
44 | 
45 | 
46 | class TestDtypeToMPI(unittest.TestCase):
47 |     @unittest.skipUnless(MPI_IMPORTED, "Needs mpi4py module")
48 |     def test_dtype_to_mpi(self):
49 |         reload(util)
50 |         assert util.dtype_to_mpi(np.dtype('bool')) == MPI.C_BOOL
51 |         assert util.dtype_to_mpi(np.dtype('int8')) == MPI.INT8_T
52 |         assert util.dtype_to_mpi(np.dtype('uint8')) == MPI.UINT8_T
53 |         assert util.dtype_to_mpi(np.dtype('int16')) == MPI.INT16_T
54 |         assert util.dtype_to_mpi(np.dtype('uint16')) == MPI.UINT16_T
55 |         assert util.dtype_to_mpi(np.dtype('int32')) == MPI.INT32_T
56 |         assert util.dtype_to_mpi(np.dtype('uint32')) == MPI.UINT32_T
57 |         assert util.dtype_to_mpi(np.dtype('int64')) == MPI.INT64_T
58 |         assert util.dtype_to_mpi(np.dtype('uint64')) == MPI.UINT64_T
59 |         assert util.dtype_to_mpi(np.dtype('float32')) == MPI.FLOAT
60 |         assert util.dtype_to_mpi(np.dtype('float64')) == MPI.DOUBLE
61 |         assert util.dtype_to_mpi(np.dtype('complex64')) == MPI.C_FLOAT_COMPLEX
62 |         assert util.dtype_to_mpi(np.dtype('complex128')) == MPI.C_DOUBLE_COMPLEX
63 | 
64 |     def test_dtype_to_mpi_import_fail(self):
65 |         util.MPI = None
66 |         with self.assertRaises(AttributeError):
67 |             util.dtype_to_mpi('int8')
68 | 
69 |     @unittest.skipUnless(MPI_IMPORTED, "Needs mpi4py module")
70 |     def test_dtype_to_mpi_dtype_fail(self):
71 |         reload(util)
72 |         with self.assertRaises(TypeError):
73 |             util.dtype_to_mpi('sadfa')
74 |         with self.assertRaises(TypeError):
75 |             util.dtype_to_mpi('')
76 |         # TODO Find how to convert from half type to MPI dtype
77 |         # and use in collectives
78 |         with self.assertRaises(TypeError):
79 |             util.dtype_to_mpi('float16')
80 | 


--------------------------------------------------------------------------------
/platoon/tests/functional/time_worker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | import os
  3 | import sys
  4 | import cProfile
  5 | import pstats
  6 | from timeit import default_timer as timer
  7 | 
  8 | from six.moves import range
  9 | 
 10 | from pygpu import gpuarray
 11 | import numpy as np
 12 | from numpy.testing import assert_allclose
 13 | 
 14 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
 15 | from platoon import Worker
 16 | 
 17 | SEED = 567
 18 | np.random.seed(SEED)
 19 | 
 20 | worker = Worker(control_port=5567)
 21 | 
 22 | 
 23 | def profile(shape=(1000, 1000), dtype='float64', rng=(-1, 1)):
 24 |     print("\n### Profiling worker")
 25 |     print()
 26 |     print("### shape =", shape)
 27 |     print("### dtype =", dtype)
 28 |     print("### range =", sorted(rng))
 29 | 
 30 |     rang = abs(rng[1] - rng[0])
 31 |     inp = np.random.random(shape) * rang + min(rng)
 32 |     inp = inp.astype(dtype)
 33 |     sinp = gpuarray.asarray(inp, context=worker.gpuctx)
 34 |     out = np.empty_like(inp)
 35 |     sout = gpuarray.asarray(out, context=worker.gpuctx)
 36 | 
 37 |     print("\n### Profiling worker.all_reduce")
 38 |     print("## First call to worker.all_reduce")
 39 |     cProfile.runctx("worker.all_reduce(sinp, '+', sout)", globals(), locals(),
 40 |                     filename="worker.prof")
 41 |     s = pstats.Stats("worker.prof")
 42 |     s.strip_dirs().sort_stats("time").print_stats()
 43 |     assert_allclose(inp * worker.global_size, np.asarray(sout))
 44 | 
 45 |     print("## Second call to worker.all_reduce")
 46 |     cProfile.runctx("worker.all_reduce(sinp, '+', sout)", globals(), locals(),
 47 |                     filename="worker.prof")
 48 |     s = pstats.Stats("worker.prof")
 49 |     s.strip_dirs().sort_stats("time").print_stats()
 50 |     assert_allclose(inp * worker.global_size, np.asarray(sout))
 51 |     if worker._multinode:
 52 |         print("## Note that there must be difference between the first and")
 53 |         print("## the second call as a result of the extra call to worker.shared")
 54 |         print("## during the first time.")
 55 | 
 56 | 
 57 | def benchmark(shape=(1000, 1000), dtype='float64', rng=(-1, 1), number=10):
 58 |     print("\n### Benchmarking worker")
 59 |     print()
 60 |     print("### shape =", shape)
 61 |     print("### dtype =", dtype)
 62 |     print("### range =", sorted(rng))
 63 |     print("### num of iterations =", number)
 64 | 
 65 |     rang = abs(rng[1] - rng[0])
 66 |     inp = np.random.random(shape) * rang + min(rng)
 67 |     inp = inp.astype(dtype)
 68 |     sinp = gpuarray.asarray(inp, context=worker.gpuctx)
 69 |     out = np.empty_like(inp)
 70 |     sout = gpuarray.asarray(out, context=worker.gpuctx)
 71 | 
 72 |     print("\n## Benchmarking worker.shared")
 73 |     print("# First call")
 74 |     start = timer()
 75 |     worker.shared(sinp)
 76 |     end = timer()
 77 |     print("Time:", end - start)
 78 |     print("# Second call")
 79 |     start = timer()
 80 |     worker.shared(sinp)
 81 |     end = timer()
 82 |     print("Time:", end - start)
 83 | 
 84 |     print("\n## Benchmarking worker.all_reduce")
 85 |     print("# Timing worker.all_reduce w/o calls to worker.shared")
 86 |     ttime = 0
 87 |     for _ in range(number):
 88 |         start = timer()
 89 |         worker.all_reduce(sinp, '+', sout)
 90 |         end = timer()
 91 |         ttime += end - start
 92 |         assert_allclose(inp * worker.global_size, np.asarray(sout))
 93 |     print("Mean time:", ttime / number)
 94 | 
 95 | 
 96 | if __name__ == '__main__':
 97 |     try:
 98 |         benchmark()
 99 |         profile()
100 |     except Exception as exc:
101 |         print(exc, file=sys.stderr)
102 |     finally:
103 |         worker.close()
104 | 


--------------------------------------------------------------------------------
/platoon/configparser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shlex
  3 | import sys
  4 | 
  5 | from six.moves import configparser as ConfigParser
  6 | 
  7 | # The PLATOON_DEVICES environment variable should be a list of comma-separated
  8 | # device name entries, e.g. PLATOON_DEVICES=cuda0,cuda2,cuda3
  9 | PLATOON_DEVICES = os.getenv("PLATOON_DEVICES", "")
 10 | 
 11 | # The PLATOON_HOSTS environment variable should be a list of comma-separated
 12 | # host machine entries, e.g. PLATOON_HOSTS=lisa1,ceylon
 13 | PLATOON_HOSTS = os.getenv("PLATOON_HOSTS", "")
 14 | 
 15 | 
 16 | def config_files_from_platoonrc():
 17 |     if sys.platform != "win32":
 18 |         rval = [os.path.expanduser('~/.platoonrc')]
 19 |         rval.append(os.path.join(os.getcwd(), '.platoonrc'))
 20 |     else:
 21 |         rval = [os.path.expanduser('~/.platoonrc.txt')]
 22 |         rval.append(os.path.join(os.getcwd(), '.platoonrc.txt'))
 23 |     if os.getenv('PLATOONRC') is not None:
 24 |         rval.extend([os.path.expanduser(s) for s in
 25 |                      os.getenv('PLATOONRC').split(os.pathsep)])
 26 |     return rval
 27 | 
 28 | config_files = config_files_from_platoonrc()
 29 | platoon_cfg = ConfigParser.SafeConfigParser(
 30 |     {'USER': os.getenv("USER", os.path.split(os.path.expanduser('~'))[-1]),
 31 |      'LSCRATCH': os.getenv("LSCRATCH", ""),
 32 |      'TMPDIR': os.getenv("TMPDIR", ""),
 33 |      'TEMP': os.getenv("TEMP", ""),
 34 |      'TMP': os.getenv("TMP", ""),
 35 |      'PID': str(os.getpid()),
 36 |      }
 37 | )
 38 | platoon_cfg.optionxform = str
 39 | platoon_cfg.read(config_files)
 40 | # Having a raw version of the config around as well enables us to pass
 41 | # through config values that contain format strings.
 42 | # The time required to parse the config twice is negligible.
 43 | platoon_raw_cfg = ConfigParser.RawConfigParser()
 44 | platoon_raw_cfg.optionxform = str
 45 | platoon_raw_cfg.read(config_files)
 46 | 
 47 | 
 48 | def fetch_devices_for_host(host):
 49 |     """A successful search returns a list of theano devices' string values.
 50 |     An unsuccessful search raises a KeyError.
 51 | 
 52 |     The (decreasing) priority order is:
 53 |     - PLATOON_DEVICES
 54 |     - PLATOONRC files (if they exist) from right to left
 55 |     - working directory's ./.platoonrc
 56 |     - ~/.platoonrc
 57 | 
 58 |     """
 59 |     # first try to have PLATOON_DEVICES
 60 |     if PLATOON_DEVICES:
 61 |         splitter = shlex.shlex(PLATOON_DEVICES, posix=True)
 62 |         splitter.whitespace += ','
 63 |         splitter.whitespace_split = True
 64 |         return list(splitter)
 65 | 
 66 |     # next try to find it in the config file
 67 |     try:
 68 |         try:
 69 |             devices = platoon_cfg.get("devices", host)
 70 |         except ConfigParser.InterpolationError:
 71 |             devices = platoon_raw_cfg.get("devices", host)
 72 |     except (ConfigParser.NoOptionError, ConfigParser.NoSectionError):
 73 |         raise KeyError(host)
 74 |     splitter = shlex.shlex(devices, posix=True)
 75 |     splitter.whitespace += ','
 76 |     splitter.whitespace_split = True
 77 |     return list(splitter)
 78 | 
 79 | 
 80 | def fetch_hosts():
 81 |     """A successful search returns a list of host to participate in a multi-node
 82 |     platoon. An unsuccessful search raises a KeyError.
 83 | 
 84 |     The (decreasing) priority order is:
 85 |     - PLATOON_HOSTS
 86 |     - PLATOONRC files (if they exist) from right to left
 87 |     - working directory's ./.platoonrc
 88 |     - ~/.platoonrc
 89 | 
 90 |     """
 91 |     # first try to have PLATOON_HOSTS
 92 |     if PLATOON_HOSTS:
 93 |         splitter = shlex.shlex(PLATOON_HOSTS, posix=True)
 94 |         splitter.whitespace += ','
 95 |         splitter.whitespace_split = True
 96 |         return list(splitter)
 97 | 
 98 |     # next try to find it in the config file
 99 |     try:
100 |         try:
101 |             hosts = platoon_cfg.get("platoon", "hosts")
102 |         except ConfigParser.InterpolationError:
103 |             hosts = platoon_raw_cfg.get("platoon", "hosts")
104 |     except (ConfigParser.NoOptionError, ConfigParser.NoSectionError):
105 |         raise KeyError("hosts")
106 |     splitter = shlex.shlex(hosts, posix=True)
107 |     splitter.whitespace += ','
108 |     splitter.whitespace_split = True
109 |     return list(splitter)
110 | 


--------------------------------------------------------------------------------
/example/simple_batched_pixel_sum/batched_pixel_sum_worker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function
  2 | import os
  3 | import sys
  4 | import gzip
  5 | import six
  6 | from six.moves import cPickle
  7 | 
  8 | import numpy as np
  9 | from numpy.testing import assert_allclose
 10 | 
 11 | import theano
 12 | from theano import tensor as T
 13 | from theano.compat.python2x import OrderedDict
 14 | 
 15 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
 16 | from platoon.channel import Worker
 17 | from platoon.param_sync import ParamSyncRule
 18 | 
 19 | 
 20 | class SUMSync(ParamSyncRule):
 21 | 
 22 |     def update_params(self, local_params, master_params):
 23 |         """
 24 |         Update the master params and reset to local params.
 25 |         """
 26 |         master_params[0] += local_params[0]
 27 |         local_params[0].fill(0)
 28 | 
 29 | 
 30 | class BatchedPixelSum(object):
 31 | 
 32 |     def __init__(self, control_port, batch_port):
 33 |         self._worker = Worker(control_port=control_port, data_port=batch_port)
 34 | 
 35 |         data_shape = self._worker.send_req('get_data_shape')
 36 | 
 37 |         self._computed_sum = theano.shared(
 38 |             value=np.zeros(data_shape, dtype=theano.config.floatX),
 39 |             name='sum', borrow=True)
 40 | 
 41 |         self._worker.init_shared_params(params=[self._computed_sum],
 42 |                                         param_sync_rule=SUMSync())
 43 | 
 44 |         input = T.matrix(dtype=theano.config.floatX)
 45 |         batch_sum = T.sum(input, axis=0, dtype=theano.config.floatX)
 46 | 
 47 |         updates = OrderedDict()
 48 |         updates[self._computed_sum] = (self._computed_sum + batch_sum)
 49 | 
 50 |         self._update_sum = theano.function(name='learn',
 51 |                                            inputs=[input],
 52 |                                            updates=updates)
 53 | 
 54 |     def get_sum(self):
 55 |         nb_batches_before_sync = 10
 56 | 
 57 |         while True:
 58 |             step = self._worker.send_req('next')
 59 |             print("# Command received: {}".format(step))
 60 | 
 61 |             if step == 'train':
 62 |                 print("# Training", end=' ')
 63 |                 # TODO: Having a fix number of MB before sync can cause
 64 |                 # problems
 65 |                 for i in range(nb_batches_before_sync):
 66 |                     data = np.asarray(self._worker.recv_mb())
 67 |                     print(".", end=' ')
 68 |                     self._update_sum(data)
 69 |                 print("Done")
 70 |                 import time
 71 |                 time.sleep(1)
 72 |                 step = self._worker.send_req('done',
 73 |                                              dict(num_batches=nb_batches_before_sync))
 74 | 
 75 |                 print("Syncing with global params.")
 76 |                 self._worker.sync_params(synchronous=True)
 77 | 
 78 |             if step == 'stop':
 79 |                 break
 80 | 
 81 |         print("All computation done.")
 82 |         return self._worker.shared_params[0]  # Return global params
 83 | 
 84 | 
 85 | def parse_arguments():
 86 |     import argparse
 87 | 
 88 |     parser = argparse.ArgumentParser()
 89 |     parser.add_argument('--batch_port', default=5566, type=int, required=False,
 90 |                         help='Port on which the batches will be transfered.')
 91 |     parser.add_argument('--control_port', default=5567, type=int,
 92 |                         required=False, help='Port on which the control '
 93 |                         'commands will be sent.')
 94 | 
 95 |     return parser.parse_args()
 96 | 
 97 | if __name__ == '__main__':
 98 |     args = parse_arguments()
 99 | 
100 |     print("Init ...", end=' ')
101 |     bps = BatchedPixelSum(control_port=args.control_port,
102 |                           batch_port=args.batch_port)
103 |     print("Done")
104 | 
105 |     computed_sum = bps.get_sum()
106 | 
107 |     # Get actual answer for testing
108 |     with gzip.open("../data/mnist.pkl.gz", 'rb') as f:
109 |         kwargs = {}
110 |         if six.PY3:
111 |             kwargs['encoding'] = 'latin1'
112 |         train_set, _, _ = cPickle.load(f, **kwargs)
113 |     real_sum = train_set[0].sum(axis=0, dtype=theano.config.floatX)
114 |     assert_allclose(computed_sum, real_sum)
115 | 


--------------------------------------------------------------------------------
/example/simple_batched_pixel_sum/batched_pixel_sum_controller.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function
  2 | import os
  3 | import sys
  4 | import gzip
  5 | import time
  6 | import six
  7 | from six.moves import cPickle
  8 | from multiprocessing import Process
  9 | 
 10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
 11 | from platoon.channel import Controller
 12 | 
 13 | 
 14 | class BatchedPixelSumController(Controller):
 15 | 
 16 |     def __init__(self, batch_port, dataset, batch_size, default_args):
 17 |         super(BatchedPixelSumController, self).__init__(**default_args)
 18 |         # The data socket should be initialized in the process that will handle
 19 |         # the batch.
 20 |         # That is why it's not initialized in the parent constructor. Second
 21 |         # param = None
 22 |         self._batch_port = batch_port
 23 | 
 24 |         self._start_time = None
 25 |         self._should_stop = False
 26 | 
 27 |         self._batch_size = batch_size
 28 |         self._dataset = dataset
 29 | 
 30 |         self._nb_batch_processed = 0
 31 |         self._nb_batch_to_process = (dataset.shape[0] // batch_size)
 32 | 
 33 |     def start_batch_server(self):
 34 |         self.p = Process(target=self._send_mb)
 35 |         self.p.start()
 36 | 
 37 |     def _send_mb(self):
 38 |         self.init_data(self._batch_port)
 39 | 
 40 |         for i in range(self._dataset.shape[0] // self._batch_size):
 41 |             batch_start = i * self._batch_size
 42 |             batch_stop = (i + 1) * self._batch_size
 43 |             self.send_mb(self._dataset[batch_start:batch_stop])
 44 | 
 45 |         self.asocket.close()
 46 |         print("Done Sending MB.")
 47 | 
 48 |         # TODO: Find a solution for this
 49 |         # Sleeping to give the chance to the worker to empty the queue before
 50 |         # the MB process dies
 51 |         import time
 52 |         time.sleep(2)
 53 | 
 54 |     def handle_control(self, req, worker_id, req_info):
 55 |         print("# Handling req: {}".format(req))
 56 |         control_response = ''
 57 | 
 58 |         if req == 'next':
 59 |             if not self._should_stop:
 60 |                 # Start a global execution timer
 61 |                 if self._start_time is None:
 62 |                     self._start_time = time.time()
 63 |                 control_response = 'train'
 64 |             else:
 65 |                 control_response = 'stop'
 66 |         elif req == 'get_data_shape':
 67 |             control_response = self._dataset[0].shape
 68 |         elif req == 'done':
 69 |             self._nb_batch_processed += req_info['num_batches']
 70 |             print("{} batches processed by worker so far."
 71 |                   .format(self._nb_batch_processed))
 72 | 
 73 |         if self._nb_batch_processed >= self._nb_batch_to_process:
 74 |             if not self._should_stop:
 75 |                 print("Training time {:.4f}s".format(
 76 |                     time.time() - self._start_time))
 77 |             self._should_stop = True
 78 | 
 79 |         return control_response
 80 | 
 81 | 
 82 | def parse_arguments():
 83 |     parser = Controller.default_parser()
 84 |     parser.add_argument('--batch_port', default=5566, type=int, required=False,
 85 |                         help='Port on which the batches will be transfered.')
 86 |     parser.add_argument('--batch-size', default=1000, type=int, required=False,
 87 |                         help='Size of the batches.')
 88 | 
 89 |     return parser.parse_args()
 90 | 
 91 | 
 92 | def get_mnist(path):
 93 |     import os
 94 |     from six.moves import urllib
 95 | 
 96 |     if not os.path.exists(path):
 97 |         print("Downloading mnist ...", end=' ')
 98 |         url = "http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz"
 99 | 
100 |         urllib.request.urlretrieve(url, path)
101 |         print("Done")
102 | 
103 | 
104 | def spawn_controller():
105 |     args = parse_arguments()
106 | 
107 |     mnist_path = "../data/mnist.pkl.gz"
108 | 
109 |     get_mnist(mnist_path)
110 | 
111 |     with gzip.open(mnist_path, 'rb') as f:
112 |         kwargs = {}
113 |         if six.PY3:
114 |             kwargs['encoding'] = 'latin1'
115 |         train_set, _, _ = cPickle.load(f, **kwargs)
116 | 
117 |     controller = BatchedPixelSumController(batch_port=args.batch_port,
118 |                                            dataset=train_set[0],
119 |                                            batch_size=args.batch_size,
120 |                                            default_args=Controller.default_arguments(args))
121 |     controller.start_batch_server()
122 |     return controller.serve()
123 | 
124 | if __name__ == '__main__':
125 |     rcode = spawn_controller()
126 |     if rcode != 0:
127 |         sys.exit(rcode)
128 | 


--------------------------------------------------------------------------------
/platoon/tests/functional/test_worker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function
  2 | import os
  3 | import sys
  4 | 
  5 | import unittest
  6 | 
  7 | from pygpu import gpuarray
  8 | import numpy as np
  9 | 
 10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
 11 | from platoon import Worker
 12 | 
 13 | 
 14 | class TestWorker(unittest.TestCase):
 15 |     @classmethod
 16 |     def setUpClass(cls):
 17 |         try:
 18 |             cls.total_nw = int(os.environ['PLATOON_TEST_WORKERS_NUM'])
 19 |             cls.worker = Worker(control_port=5567)
 20 |             cls.ctx = cls.worker.gpuctx
 21 |         except Exception as exc:
 22 |             print(exc, file=sys.stderr)
 23 |             raise exc
 24 | 
 25 |     def test_is_singleton(self):
 26 |         inst = Worker()
 27 |         assert inst is self.worker
 28 |         print("The following warning is produced by testing procedure:", file=sys.stderr)
 29 |         inst = Worker(123413)
 30 |         assert inst is self.worker
 31 | 
 32 |     def test_global_size(self):
 33 |         assert self.worker.global_size == self.total_nw
 34 | 
 35 |     def test_interface1(self):
 36 |         inp = np.arange(32, dtype='float64')
 37 |         sinp = gpuarray.asarray(inp, context=self.ctx)
 38 |         out = np.empty_like(inp)
 39 |         sout = gpuarray.asarray(out, context=self.ctx)
 40 |         self.worker.all_reduce(sinp, '+', sout)
 41 |         expected = self.total_nw * inp
 42 |         actual = np.asarray(sout)
 43 |         assert np.allclose(expected, actual)
 44 | 
 45 |     def test_interface2(self):
 46 |         inp = np.arange(32, dtype='float64')
 47 |         sinp = gpuarray.asarray(inp, context=self.ctx)
 48 |         self.worker.all_reduce(sinp, '+', sinp)
 49 |         expected = self.total_nw * inp
 50 |         actual = np.asarray(sinp)
 51 |         assert np.allclose(expected, actual)
 52 | 
 53 |     def test_interface3(self):
 54 |         inp = np.arange(32, dtype='float64')
 55 |         sinp = gpuarray.asarray(inp, context=self.ctx)
 56 |         sout = self.worker.all_reduce(sinp, '+')
 57 |         expected = self.total_nw * inp
 58 |         actual = np.asarray(sout)
 59 |         assert np.allclose(expected, actual)
 60 | 
 61 |     def test_linked_shared(self):
 62 |         inp = np.arange(32, dtype='float64')
 63 |         sinp = gpuarray.asarray(inp, context=self.ctx)
 64 |         insize = sinp.size * sinp.itemsize
 65 |         out = np.empty_like(inp)
 66 |         sout = gpuarray.asarray(out, context=self.ctx)
 67 |         outsize = sout.size * sout.itemsize
 68 | 
 69 |         if self.worker._multinode:
 70 |             try:
 71 |                 self.worker.shared_arrays[outsize]
 72 |                 self.fail("'sout''s size has not been linked yet to a shared buffer")
 73 |             except KeyError:
 74 |                 pass
 75 |             try:
 76 |                 self.worker.shared_arrays[insize]
 77 |                 self.fail("'sinp''s size has not been linked yet to a shared buffer")
 78 |             except KeyError:
 79 |                 pass
 80 | 
 81 |         self.worker.all_reduce(sinp, '+', sout)
 82 | 
 83 |         if self.worker._multinode:
 84 |             try:
 85 |                 self.worker.shared_arrays[outsize]
 86 |             except KeyError:
 87 |                 self.fail("`sout`'s size should have been linked to a shared buffer")
 88 |             try:
 89 |                 self.worker.shared_arrays[insize]
 90 |             except KeyError:
 91 |                 self.fail("`sinp`'s size should have been linked to a shared buffer")
 92 | 
 93 |         expected = self.total_nw * inp
 94 |         actual = np.asarray(sout)
 95 |         assert np.allclose(expected, actual)
 96 | 
 97 |         self.worker.all_reduce(sout, '*', sout)
 98 | 
 99 |         if self.worker._multinode:
100 |             try:
101 |                 self.worker.shared_arrays[outsize]
102 |             except KeyError:
103 |                 self.fail("`sout`'s size should have been linked to a shared buffer")
104 |             try:
105 |                 self.worker.shared_arrays[insize]
106 |             except KeyError:
107 |                 self.fail("`sinp`'s size should have been linked to a shared buffer")
108 | 
109 |         expected = expected ** self.total_nw
110 |         actual = np.asarray(sout)
111 |         assert np.allclose(expected, actual)
112 | 
113 |     @classmethod
114 |     def tearDownClass(cls):
115 |         cls.worker.close()
116 | 
117 | if __name__ == '__main__':
118 |     print("### Beginning Worker's tests...")
119 |     suite = unittest.TestLoader().loadTestsFromTestCase(TestWorker)
120 |     res = unittest.TextTestRunner(verbosity=1).run(suite)
121 |     if len(res.failures) != 0 or len(res.errors) != 0:
122 |         sys.exit(1)
123 | 


--------------------------------------------------------------------------------
/platoon/param_sync.py:
--------------------------------------------------------------------------------
  1 | class ParamSyncRule(object):
  2 |     """
  3 |     Abstract parameter synchronisation rule.
  4 | 
  5 |     This abstract class defines the interface that should be followed by
  6 |     implementations of parameter synchronization rules for distributed
  7 |     training.
  8 |     """
  9 | 
 10 |     def make_update_function(self, local_params):
 11 |         """Return a function that will be called with the current value of the
 12 |         master parameters and should update them inplace.  This
 13 |         function must also update the values of local_params (that are
 14 |         shared values) as a side effect.
 15 |         """
 16 |         try:
 17 |             f = self.theano_update(local_params)
 18 | 
 19 |             def update(master_params, f=f):
 20 |                 new_master_values = f(*master_params)
 21 |                 for p, v in zip(master_params, new_master_values):
 22 |                     p[:] = v
 23 |         except NotImplementedError:
 24 |             def update(master_params, local_params=local_params,
 25 |                        update_params=self.update_params):
 26 |                 local_param_values = [p.get_value() for p in local_params]
 27 |                 update_params(local_param_values, master_params)
 28 |                 for p, v in zip(local_params, local_param_values):
 29 |                     p.set_value(v)
 30 |         return update
 31 | 
 32 |     def theano_update(self, local_params):
 33 |         """Compile and return a theano function that will update the local
 34 |         params and return new values for the master params.
 35 | 
 36 |         This function is preferred to update_params below.
 37 |         """
 38 |         raise NotImplementedError()
 39 | 
 40 |     def update_params(self, local_params, master_params):
 41 |         """Perform an inplace update of the local and master params according
 42 |         to some update rule.
 43 | 
 44 |         This function need not be implemented if theano_update is
 45 |         overridden.
 46 | 
 47 |         """
 48 |         raise NotImplementedError()
 49 | 
 50 | 
 51 | class EASGD(ParamSyncRule):
 52 |     """
 53 |     Implementation of the EASGD parameter sync rule.
 54 | 
 55 |     According to this rule, every N iterations, a worker synchronises his
 56 |     parameters with the master parameters. This is done by moving each set of
 57 |     parameters toward the other by an amount proportional to the difference
 58 |     between the individual params (this proportion is parametrized by `alpha`).
 59 | 
 60 |     The sync equations are as follow:
 61 |     diff = w_worker - w_master
 62 |     w_worker = w_worker - alpha * diff
 63 |     w_master = w_master + alpha * diff
 64 | 
 65 |     NOTE : if alpha=0 is used, there is no synchronization of the
 66 |     parameters meaning that each worker is independently training using SGD.
 67 | 
 68 |     This algorithm is described in more details in the following paper:
 69 |     http://arxiv.org/abs/1412.6651
 70 |     """
 71 | 
 72 |     def __init__(self, alpha):
 73 |         self.set_alpha(alpha)
 74 | 
 75 |     def get_alpha(self):
 76 |         return self.alpha
 77 | 
 78 |     def set_alpha(self, alpha):
 79 |         self.alpha = alpha
 80 | 
 81 |     def theano_update(self, local_params):
 82 |         # Theano is imported here to avoid a strong dependancy on it.
 83 |         import theano
 84 |         master_inps = [l.type() for l in local_params]
 85 |         master_ups = []
 86 |         local_ups = []
 87 |         for p_local, p_master in zip(local_params, master_inps):
 88 |             diff = self.alpha * (p_local - p_master)
 89 |             local_ups.append(p_local - diff)
 90 |             master_ups.append(p_master + diff)
 91 |         return theano.function(master_inps, master_ups,
 92 |                                updates=list(zip(local_params, local_ups)))
 93 | 
 94 |     def update_params(self, local_params, master_params):
 95 |         for p_local, p_master in zip(local_params, master_params):
 96 |             diff = self.alpha * (p_local - p_master)
 97 |             p_local -= diff
 98 |             p_master += diff
 99 | 
100 | 
101 | class ASGD(ParamSyncRule):
102 |     def theano_update(self, local_params):
103 |         import theano
104 | 
105 |         local_vals = [p.get_value(borrow=True, return_internal_type=True)
106 |                       for p in local_params]
107 |         master_inps = [l.type() for l in local_params]
108 |         self.old_locals = [theano.shared(l) for l in local_vals]
109 |         # This updates the global params with the difference between
110 |         # old and current (aka the gradients).
111 |         ret = [m + (p - o) for (m, p, o) in zip(master_inps, local_params,
112 |                                                 self.old_locals)]
113 |         # This keeps values before the update for the local params
114 |         ups = list(zip(self.old_locals, ret))
115 |         # This updates the local params to be the same as the global
116 |         ups += list(zip(local_params, ret))
117 |         return theano.function(master_inps, ret, updates=ups)
118 | 


--------------------------------------------------------------------------------
/platoon/tests/functional/test_global_dynamics_worker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | import os
  3 | import sys
  4 | 
  5 | import unittest
  6 | 
  7 | import theano
  8 | from theano import config
  9 | import numpy as np
 10 | 
 11 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
 12 | from platoon.training import global_dynamics as gd
 13 | from platoon.channel.worker import Worker
 14 | 
 15 | 
 16 | class TestGlobalDynamicsWorker(unittest.TestCase):
 17 |     @classmethod
 18 |     def setUpClass(cls):
 19 |         try:
 20 |             cls.worker = Worker(control_port=5567)
 21 |             cls.total_nw = cls.worker.global_size
 22 |             cls.rank = cls.worker.global_rank
 23 |         except Exception as exc:
 24 |             print(exc, file=sys.stderr)
 25 |             raise exc
 26 | 
 27 |     def setUp(self):
 28 |         super(TestGlobalDynamicsWorker, self).setUp()
 29 |         SEED = 567
 30 |         np.random.seed(SEED)
 31 |         self.inp1 = 30 * np.random.random((8, 10, 5)).astype(config.floatX)
 32 |         self.sinp1 = theano.shared(self.inp1)
 33 |         self.inp2 = 50 * np.random.random((5, 20)).astype(config.floatX)
 34 |         self.sinp2 = theano.shared(self.inp2)
 35 | 
 36 |     def test_sumSGD_object(self):
 37 |         sumsgd = gd.SumSGD()
 38 |         sumsgd.make_rule(self.sinp1)
 39 |         sumsgd()
 40 |         expected = self.inp1 * self.total_nw
 41 |         actual = self.sinp1.get_value()
 42 |         assert np.allclose(expected, actual)
 43 | 
 44 |     def test_sumSGD_list(self):
 45 |         sumsgd = gd.SumSGD()
 46 |         sumsgd.make_rule([self.sinp1, self.sinp2])
 47 |         sumsgd()
 48 |         expected = self.inp1 * self.total_nw
 49 |         actual = self.sinp1.get_value()
 50 |         assert np.allclose(expected, actual)
 51 |         expected = self.inp2 * self.total_nw
 52 |         actual = self.sinp2.get_value()
 53 |         assert np.allclose(expected, actual)
 54 | 
 55 |     def test_averageSGD_object(self):
 56 |         averagesgd = gd.AverageSGD()
 57 |         averagesgd.make_rule(self.sinp1)
 58 |         averagesgd()
 59 |         expected = self.inp1
 60 |         actual = self.sinp1.get_value()
 61 |         assert np.allclose(expected, actual)
 62 | 
 63 |     def test_averageSGD_list(self):
 64 |         averagesgd = gd.AverageSGD()
 65 |         averagesgd.make_rule([self.sinp1, self.sinp2])
 66 |         averagesgd()
 67 |         expected = self.inp1
 68 |         actual = self.sinp1.get_value()
 69 |         assert np.allclose(expected, actual)
 70 |         expected = self.inp2
 71 |         actual = self.sinp2.get_value()
 72 |         assert np.allclose(expected, actual)
 73 | 
 74 |     def test_EASGD(self):
 75 |         lp = np.array([3, 4], dtype=config.floatX)
 76 |         if self.rank % 2 != 0:
 77 |             lp = -lp
 78 |         slp = theano.shared(lp)
 79 |         cp = np.array([0, 0], dtype=config.floatX)
 80 |         scp = theano.shared(cp)
 81 |         alpha = 0.5
 82 | 
 83 |         easgd = gd.EASGD()
 84 |         easgd.make_rule(slp, scp, alpha)
 85 |         easgd()
 86 | 
 87 |         if self.total_nw % 2 == 0:
 88 |             expectedcp = cp
 89 |             actualcp = scp.get_value()
 90 |             assert np.allclose(expectedcp, actualcp), (expectedcp, actualcp)
 91 |             expectedlp = lp / 2
 92 |             actuallp = slp.get_value()
 93 |             assert np.allclose(expectedlp, actuallp), (expectedlp, actuallp)
 94 |         else:
 95 |             expectedcp = lp / 2
 96 |             actualcp = scp.get_value()
 97 |             assert np.allclose(expectedcp, actualcp), (expectedcp, actualcp)
 98 |             expectedlp = lp / 2
 99 |             actuallp = slp.get_value()
100 |             assert np.allclose(expectedlp, actuallp), (expectedlp, actuallp)
101 | 
102 |     def test_Downpour(self):
103 |         lp = np.random.random((2,)).astype(config.floatX)
104 |         slp = theano.shared(lp)
105 |         gp = np.array([0, 1], dtype=config.floatX)
106 |         sgp = theano.shared(gp)
107 |         lau = (self.rank + 1) * np.array([1, 1], dtype=config.floatX)
108 |         slau = theano.shared(lau)
109 | 
110 |         downpour = gd.Downpour()
111 |         downpour.make_rule(slp, slau, sgp)
112 |         downpour()
113 | 
114 |         expected = np.array([0, 0], dtype=config.floatX)
115 |         actual = slau.get_value()
116 |         assert np.allclose(expected, actual), (expected, actual)
117 |         expected = sum(np.arange(self.total_nw + 1)) * np.array([1, 1], dtype=config.floatX)
118 |         expected += np.array([0, 1], dtype=config.floatX)
119 |         actual = sgp.get_value()
120 |         assert np.allclose(expected, actual), (expected, actual)
121 |         actual = slp.get_value()
122 |         assert np.allclose(expected, actual), (expected, actual)
123 | 
124 |     @classmethod
125 |     def tearDownClass(cls):
126 |         cls.worker.close()
127 | 
128 | if __name__ == '__main__':
129 |     suite = unittest.TestLoader().loadTestsFromTestCase(TestGlobalDynamicsWorker)
130 |     res = unittest.TextTestRunner(verbosity=1).run(suite)
131 |     if len(res.failures) != 0 or len(res.errors) != 0:
132 |         sys.exit(1)
133 | 


--------------------------------------------------------------------------------
/example/lstm/lstm_controller.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function
  2 | import os
  3 | import sys
  4 | import time
  5 | 
  6 | import numpy
  7 | 
  8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
  9 | from platoon.channel import Controller
 10 | 
 11 | 
 12 | class LSTMController(Controller):
 13 |     """
 14 |     This multi-process controller implements patience-based early-stopping SGD
 15 |     """
 16 | 
 17 |     def __init__(self, max_mb, patience, valid_freq, default_args):
 18 |         """
 19 |         Initialize the LSTMController
 20 | 
 21 |         Parameters
 22 |         ----------
 23 |         max_mb : int
 24 |             Max number of minibatches to train on.
 25 |         patience: : int
 26 |             Training stops when this many minibatches have been trained on
 27 |             without any reported improvement.
 28 |         valid_freq : int
 29 |             Number of minibatches to train on between every monitoring step.
 30 |         default_args : dict
 31 |             Arguments of default class Controller
 32 |         """
 33 | 
 34 |         super(LSTMController, self).__init__(**default_args)
 35 |         self.patience = patience
 36 |         self.max_mb = int(max_mb)
 37 | 
 38 |         self.valid_freq = valid_freq
 39 |         self.uidx = 0
 40 |         self.eidx = 0
 41 |         self.history_errs = []
 42 |         self.bad_counter = 0
 43 | 
 44 |         self.valid = False
 45 |         self.start_time = None
 46 |         self._should_stop = False
 47 | 
 48 |     def handle_control(self, req, worker_id, req_info):
 49 |         """
 50 |         Handles a control_request received from a worker
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         req : str or dict
 55 |             Control request received from a worker.
 56 |             The control request can be one of the following
 57 |             1) "next" : request by a worker to be informed of its next action
 58 |                to perform. The answers from the server can be 'train' (the
 59 |                worker should keep training on its training data), 'valid' (the
 60 |                worker should perform monitoring on its validation set and test
 61 |                set) or 'stop' (the worker should stop training).
 62 |             2) dict of format {"done":N} : used by a worker to inform the
 63 |                 server that is has performed N more training iterations and
 64 |                 synced its parameters. The server will respond 'stop' if the
 65 |                 maximum number of training minibatches has been reached.
 66 |             3) dict of format {"valid_err":x, "test_err":x2} : used by a worker
 67 |                 to inform the server that it has performed a monitoring step
 68 |                 and obtained the included errors on the monitoring datasets.
 69 |                 The server will respond "best" if this is the best reported
 70 |                 validation error so far, otherwise it will respond 'stop' if
 71 |                 the patience has been exceeded.
 72 |         """
 73 |         control_response = ""
 74 | 
 75 |         if req == 'next':
 76 |             if not self._should_stop:
 77 |                 if self.start_time is None:
 78 |                     self.start_time = time.time()
 79 | 
 80 |                 if self.valid:
 81 |                     self.valid = False
 82 |                     control_response = 'valid'
 83 |                 else:
 84 |                     control_response = 'train'
 85 |             else:
 86 |                 control_response = 'stop'
 87 |         elif req == 'done':
 88 |             self.uidx += req_info['train_len']
 89 | 
 90 |             if numpy.mod(self.uidx, self.valid_freq) == 0:
 91 |                 self.valid = True
 92 |         elif req == 'pred_errors':
 93 |             valid_err = req_info['valid_err']
 94 |             test_err = req_info['test_err']
 95 |             self.history_errs.append([valid_err, test_err])
 96 |             harr = numpy.array(self.history_errs)[:, 0]
 97 | 
 98 |             if valid_err <= harr.min():
 99 |                 self.bad_counter = 0
100 |                 control_response = 'best'
101 |                 print("Best error valid:", valid_err, "test:", test_err)
102 |             elif (len(self.history_errs) > self.patience and valid_err >= harr[:-self.patience].min()):
103 |                 self.bad_counter += 1
104 | 
105 |         if self.uidx > self.max_mb or self.bad_counter > self.patience:
106 |             if not self._should_stop:
107 |                 print("Training time {:.4f}s".format(time.time() - self.start_time))
108 |                 print("Number of samples:", self.uidx)
109 |             self._should_stop = True
110 | 
111 |         return control_response
112 | 
113 | 
114 | def lstm_control(saveFreq=1110, saveto=None):
115 |     parser = Controller.default_parser()
116 |     parser.add_argument('--max-mb', default=((5000 * 1998) / 10), type=int,
117 |                         required=False, help='Maximum mini-batches to train upon in total.')
118 |     parser.add_argument('--patience', default=10, type=int,
119 |                         required=False, help='Maximum patience when failing to get better validation results.')
120 |     parser.add_argument('--valid-freq', default=370, type=int,
121 |                         required=False, help='How often in mini-batches prediction function should get validated.')
122 |     args = parser.parse_args()
123 | 
124 |     l = LSTMController(max_mb=args.max_mb,
125 |                        patience=args.patience,
126 |                        valid_freq=args.valid_freq,
127 |                        default_args=Controller.default_arguments(args))
128 | 
129 |     print("Controller is ready")
130 |     return l.serve()
131 | 
132 | if __name__ == '__main__':
133 |     rcode = lstm_control()
134 |     if rcode != 0:
135 |         sys.exit(rcode)
136 | 


--------------------------------------------------------------------------------
/example/lstm/imdb.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import gzip
  4 | 
  5 | from six.moves import cPickle
  6 | 
  7 | import numpy
  8 | import theano
  9 | 
 10 | 
 11 | def prepare_data(seqs, labels, maxlen=None):
 12 |     """Create the matrices from the datasets.
 13 | 
 14 |     This pad each sequence to the same lenght: the lenght of the
 15 |     longuest sequence or maxlen.
 16 | 
 17 |     if maxlen is set, we will cut all sequence to this maximum
 18 |     lenght.
 19 | 
 20 |     This swap the axis!
 21 |     """
 22 |     # x: a list of sentences
 23 |     lengths = [len(s) for s in seqs]
 24 | 
 25 |     if maxlen is not None:
 26 |         new_seqs = []
 27 |         new_labels = []
 28 |         new_lengths = []
 29 |         for l, s, y in zip(lengths, seqs, labels):
 30 |             if l < maxlen:
 31 |                 new_seqs.append(s)
 32 |                 new_labels.append(y)
 33 |                 new_lengths.append(l)
 34 |         lengths = new_lengths
 35 |         labels = new_labels
 36 |         seqs = new_seqs
 37 | 
 38 |         if len(lengths) < 1:
 39 |             return None, None, None
 40 | 
 41 |     n_samples = len(seqs)
 42 |     maxlen = numpy.max(lengths)
 43 | 
 44 |     x = numpy.zeros((maxlen, n_samples)).astype('int64')
 45 |     x_mask = numpy.zeros((maxlen, n_samples)).astype(theano.config.floatX)
 46 |     for idx, s in enumerate(seqs):
 47 |         x[:lengths[idx], idx] = s
 48 |         x_mask[:lengths[idx], idx] = 1.
 49 | 
 50 |     return x, x_mask, labels
 51 | 
 52 | 
 53 | def get_dataset_file(dataset, default_dataset, origin):
 54 |     '''Look for it as if it was a full path, if not, try local file,
 55 |     if not try in the data directory.
 56 | 
 57 |     Download dataset if it is not present
 58 | 
 59 |     '''
 60 |     data_dir, data_file = os.path.split(dataset)
 61 |     if data_dir == "" and not os.path.isfile(dataset):
 62 |         # Check if dataset is in the data directory.
 63 |         new_path = os.path.join(
 64 |             os.path.split(__file__)[0],
 65 |             "..",
 66 |             "data",
 67 |             dataset
 68 |         )
 69 |         if os.path.isfile(new_path) or data_file == default_dataset:
 70 |             dataset = new_path
 71 | 
 72 |     if (not os.path.isfile(dataset)) and data_file == default_dataset:
 73 |         from six.moves import urllib
 74 |         print('Downloading data from %s' % origin)
 75 |         urllib.request.urlretrieve(origin, dataset)
 76 |     return dataset
 77 | 
 78 | 
 79 | def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
 80 |               sort_by_len=True):
 81 |     '''Loads the dataset
 82 | 
 83 |     :type path: String
 84 |     :param path: The path to the dataset (here IMDB)
 85 |     :type n_words: int
 86 |     :param n_words: The number of word to keep in the vocabulary.
 87 |         All extra words are set to unknow (1).
 88 |     :type valid_portion: float
 89 |     :param valid_portion: The proportion of the full train set used for
 90 |         the validation set.
 91 |     :type maxlen: None or positive int
 92 |     :param maxlen: the max sequence length we use in the train/valid set.
 93 |     :type sort_by_len: bool
 94 |     :name sort_by_len: Sort by the sequence lenght for the train,
 95 |         valid and test set. This allow faster execution as it cause
 96 |         less padding per minibatch. Another mechanism must be used to
 97 |         shuffle the train set at each epoch.
 98 | 
 99 |     '''
100 | 
101 |     #############
102 |     # LOAD DATA #
103 |     #############
104 | 
105 |     # Load the dataset
106 |     path = get_dataset_file(
107 |         path, "imdb.pkl",
108 |         "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
109 | 
110 |     if path.endswith(".gz"):
111 |         f = gzip.open(path, 'rb')
112 |     else:
113 |         f = open(path, 'rb')
114 | 
115 |     train_set = cPickle.load(f)
116 |     test_set = cPickle.load(f)
117 |     f.close()
118 |     if maxlen:
119 |         new_train_set_x = []
120 |         new_train_set_y = []
121 |         for x, y in zip(train_set[0], train_set[1]):
122 |             if len(x) < maxlen:
123 |                 new_train_set_x.append(x)
124 |                 new_train_set_y.append(y)
125 |         train_set = (new_train_set_x, new_train_set_y)
126 |         del new_train_set_x, new_train_set_y
127 | 
128 |     # split training set into validation set
129 |     train_set_x, train_set_y = train_set
130 |     n_samples = len(train_set_x)
131 |     sidx = numpy.random.permutation(n_samples)
132 |     n_train = int(numpy.round(n_samples * (1. - valid_portion)))
133 |     valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
134 |     valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
135 |     train_set_x = [train_set_x[s] for s in sidx[:n_train]]
136 |     train_set_y = [train_set_y[s] for s in sidx[:n_train]]
137 | 
138 |     train_set = (train_set_x, train_set_y)
139 |     valid_set = (valid_set_x, valid_set_y)
140 | 
141 |     def remove_unk(x):
142 |         return [[1 if w >= n_words else w for w in sen] for sen in x]
143 | 
144 |     test_set_x, test_set_y = test_set
145 |     valid_set_x, valid_set_y = valid_set
146 |     train_set_x, train_set_y = train_set
147 | 
148 |     train_set_x = remove_unk(train_set_x)
149 |     valid_set_x = remove_unk(valid_set_x)
150 |     test_set_x = remove_unk(test_set_x)
151 | 
152 |     def len_argsort(seq):
153 |         return sorted(range(len(seq)), key=lambda x: len(seq[x]))
154 | 
155 |     if sort_by_len:
156 |         sorted_index = len_argsort(test_set_x)
157 |         test_set_x = [test_set_x[i] for i in sorted_index]
158 |         test_set_y = [test_set_y[i] for i in sorted_index]
159 | 
160 |         sorted_index = len_argsort(valid_set_x)
161 |         valid_set_x = [valid_set_x[i] for i in sorted_index]
162 |         valid_set_y = [valid_set_y[i] for i in sorted_index]
163 | 
164 |         sorted_index = len_argsort(train_set_x)
165 |         train_set_x = [train_set_x[i] for i in sorted_index]
166 |         train_set_y = [train_set_y[i] for i in sorted_index]
167 | 
168 |     train = (train_set_x, train_set_y)
169 |     valid = (valid_set_x, valid_set_y)
170 |     test = (test_set_x, test_set_y)
171 | 
172 |     return train, valid, test
173 | 


--------------------------------------------------------------------------------
/example/synchronous_lstm/imdb.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import gzip
  4 | 
  5 | from six.moves import cPickle
  6 | 
  7 | import numpy
  8 | import theano
  9 | 
 10 | 
 11 | def prepare_data(seqs, labels, maxlen=None):
 12 |     """Create the matrices from the datasets.
 13 | 
 14 |     This pad each sequence to the same lenght: the lenght of the
 15 |     longuest sequence or maxlen.
 16 | 
 17 |     if maxlen is set, we will cut all sequence to this maximum
 18 |     lenght.
 19 | 
 20 |     This swap the axis!
 21 |     """
 22 |     # x: a list of sentences
 23 |     lengths = [len(s) for s in seqs]
 24 | 
 25 |     if maxlen is not None:
 26 |         new_seqs = []
 27 |         new_labels = []
 28 |         new_lengths = []
 29 |         for l, s, y in zip(lengths, seqs, labels):
 30 |             if l < maxlen:
 31 |                 new_seqs.append(s)
 32 |                 new_labels.append(y)
 33 |                 new_lengths.append(l)
 34 |         lengths = new_lengths
 35 |         labels = new_labels
 36 |         seqs = new_seqs
 37 | 
 38 |         if len(lengths) < 1:
 39 |             return None, None, None
 40 | 
 41 |     n_samples = len(seqs)
 42 |     maxlen = numpy.max(lengths)
 43 | 
 44 |     x = numpy.zeros((maxlen, n_samples)).astype('int64')
 45 |     x_mask = numpy.zeros((maxlen, n_samples)).astype(theano.config.floatX)
 46 |     for idx, s in enumerate(seqs):
 47 |         x[:lengths[idx], idx] = s
 48 |         x_mask[:lengths[idx], idx] = 1.
 49 | 
 50 |     return x, x_mask, labels
 51 | 
 52 | 
 53 | def get_dataset_file(dataset, default_dataset, origin):
 54 |     '''Look for it as if it was a full path, if not, try local file,
 55 |     if not try in the data directory.
 56 | 
 57 |     Download dataset if it is not present
 58 | 
 59 |     '''
 60 |     data_dir, data_file = os.path.split(dataset)
 61 |     if data_dir == "" and not os.path.isfile(dataset):
 62 |         # Check if dataset is in the data directory.
 63 |         new_path = os.path.join(
 64 |             os.path.split(__file__)[0],
 65 |             "..",
 66 |             "data",
 67 |             dataset
 68 |         )
 69 |         if os.path.isfile(new_path) or data_file == default_dataset:
 70 |             dataset = new_path
 71 | 
 72 |     if (not os.path.isfile(dataset)) and data_file == default_dataset:
 73 |         from six.moves import urllib
 74 |         print('Downloading data from %s' % origin)
 75 |         urllib.request.urlretrieve(origin, dataset)
 76 |     return dataset
 77 | 
 78 | 
 79 | def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
 80 |               sort_by_len=True):
 81 |     '''Loads the dataset
 82 | 
 83 |     :type path: String
 84 |     :param path: The path to the dataset (here IMDB)
 85 |     :type n_words: int
 86 |     :param n_words: The number of word to keep in the vocabulary.
 87 |         All extra words are set to unknow (1).
 88 |     :type valid_portion: float
 89 |     :param valid_portion: The proportion of the full train set used for
 90 |         the validation set.
 91 |     :type maxlen: None or positive int
 92 |     :param maxlen: the max sequence length we use in the train/valid set.
 93 |     :type sort_by_len: bool
 94 |     :name sort_by_len: Sort by the sequence lenght for the train,
 95 |         valid and test set. This allow faster execution as it cause
 96 |         less padding per minibatch. Another mechanism must be used to
 97 |         shuffle the train set at each epoch.
 98 | 
 99 |     '''
100 | 
101 |     #############
102 |     # LOAD DATA #
103 |     #############
104 | 
105 |     # Load the dataset
106 |     path = get_dataset_file(
107 |         path, "imdb.pkl",
108 |         "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
109 | 
110 |     if path.endswith(".gz"):
111 |         f = gzip.open(path, 'rb')
112 |     else:
113 |         f = open(path, 'rb')
114 | 
115 |     train_set = cPickle.load(f)
116 |     test_set = cPickle.load(f)
117 |     f.close()
118 |     if maxlen:
119 |         new_train_set_x = []
120 |         new_train_set_y = []
121 |         for x, y in zip(train_set[0], train_set[1]):
122 |             if len(x) < maxlen:
123 |                 new_train_set_x.append(x)
124 |                 new_train_set_y.append(y)
125 |         train_set = (new_train_set_x, new_train_set_y)
126 |         del new_train_set_x, new_train_set_y
127 | 
128 |     # split training set into validation set
129 |     train_set_x, train_set_y = train_set
130 |     n_samples = len(train_set_x)
131 |     sidx = numpy.random.permutation(n_samples)
132 |     n_train = int(numpy.round(n_samples * (1. - valid_portion)))
133 |     valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
134 |     valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
135 |     train_set_x = [train_set_x[s] for s in sidx[:n_train]]
136 |     train_set_y = [train_set_y[s] for s in sidx[:n_train]]
137 | 
138 |     train_set = (train_set_x, train_set_y)
139 |     valid_set = (valid_set_x, valid_set_y)
140 | 
141 |     def remove_unk(x):
142 |         return [[1 if w >= n_words else w for w in sen] for sen in x]
143 | 
144 |     test_set_x, test_set_y = test_set
145 |     valid_set_x, valid_set_y = valid_set
146 |     train_set_x, train_set_y = train_set
147 | 
148 |     train_set_x = remove_unk(train_set_x)
149 |     valid_set_x = remove_unk(valid_set_x)
150 |     test_set_x = remove_unk(test_set_x)
151 | 
152 |     def len_argsort(seq):
153 |         return sorted(range(len(seq)), key=lambda x: len(seq[x]))
154 | 
155 |     if sort_by_len:
156 |         sorted_index = len_argsort(test_set_x)
157 |         test_set_x = [test_set_x[i] for i in sorted_index]
158 |         test_set_y = [test_set_y[i] for i in sorted_index]
159 | 
160 |         sorted_index = len_argsort(valid_set_x)
161 |         valid_set_x = [valid_set_x[i] for i in sorted_index]
162 |         valid_set_y = [valid_set_y[i] for i in sorted_index]
163 | 
164 |         sorted_index = len_argsort(train_set_x)
165 |         train_set_x = [train_set_x[i] for i in sorted_index]
166 |         train_set_y = [train_set_y[i] for i in sorted_index]
167 | 
168 |     train = (train_set_x, train_set_y)
169 |     valid = (valid_set_x, valid_set_y)
170 |     test = (test_set_x, test_set_y)
171 | 
172 |     return train, valid, test
173 | 


--------------------------------------------------------------------------------
/platoon/util.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | :mod:`util` -- Common utility functions for Platoon's classes
  4 | =============================================================
  5 | 
  6 | .. module:: util
  7 |    :platform: Unix
  8 |    :synopsis: Contains PlatoonException classes and various helpers.
  9 | 
 10 | """
 11 | from __future__ import print_function
 12 | import os
 13 | import sys
 14 | import subprocess
 15 | import cffi
 16 | 
 17 | import numpy as np
 18 | try:
 19 |     from mpi4py import MPI
 20 | except ImportError:
 21 |     MPI = None
 22 | 
 23 | 
 24 | class PlatoonException(Exception):
 25 |     """Exception used for abnormal behaviour related to Platoon.
 26 | 
 27 |     Useful for logging and managing error.
 28 | 
 29 |     """
 30 |     def __init__(self, severity, descr, from_exc=None):
 31 |         self.severity = severity
 32 |         self.descr = descr
 33 |         self.from_exc = from_exc
 34 | 
 35 |     def __str__(self):
 36 |         d = str(self.severity) + "! " + str(self.descr)
 37 |         if self.from_exc is not None:
 38 |             d += "\nReason: " + str(self.from_exc)
 39 |         return d
 40 | 
 41 | 
 42 | class PlatoonError(PlatoonException):
 43 |     """
 44 |     Exception used for errors related to Platoon.
 45 |     """
 46 |     def __init__(self, descr, from_exc=None):
 47 |         super(PlatoonError, self).__init__("ERROR", descr, from_exc)
 48 | 
 49 | 
 50 | class PlatoonWarning(PlatoonException):
 51 |     """
 52 |     Exception used for warnings related to Platoon.
 53 |     """
 54 |     def __init__(self, descr, from_exc=None):
 55 |         super(PlatoonWarning, self).__init__("WARNING", descr, from_exc)
 56 | 
 57 | 
 58 | def mmap(length=0, prot=0x3, flags=0x1, fd=0, offset=0):
 59 |     """
 60 |     Map file descriptor or shared memory buffer to virtual address space of this
 61 |     process and create an object with Python buffer interface for that address.
 62 |     """
 63 |     _ffi = cffi.FFI()
 64 |     _ffi.cdef("void *mmap(void *, size_t, int, int, int, size_t);")
 65 |     _lib = _ffi.dlopen(None)
 66 | 
 67 |     addr = _ffi.NULL
 68 | 
 69 |     m = _lib.mmap(addr, length, prot, flags, fd, offset)
 70 |     if m == _ffi.cast('void *', -1):
 71 |         raise OSError(_ffi.errno, "for mmap")
 72 |     return _ffi.buffer(m, length)
 73 | 
 74 | 
 75 | def launch_process(logs_folder, experiment_name, args, device,
 76 |                    process_type="worker"):
 77 |     """
 78 |     Helper function for a Platoon subprocess.
 79 |     """
 80 |     print("## Starting {0} on {1} ...".format(process_type, device), end=' ')
 81 | 
 82 |     log_file = os.path.join(logs_folder, "{0}_{1}.{{}}".format(process_type, device))
 83 |     with open(log_file.format("out"), 'w') as stdout_file:
 84 |         with open(log_file.format("err"), 'w') as stderr_file:
 85 |             env = dict(os.environ)
 86 |             env['THEANO_FLAGS'] = '{},device={}'.format(env.get('THEANO_FLAGS', ''), device)
 87 |             if experiment_name == "platoon" and process_type == "controller":
 88 |                 executable = ["-m", "platoon.channel.controller"]
 89 |             else:
 90 |                 executable = ["{0}_{1}.py".format(experiment_name, process_type)]
 91 |             command = [sys.executable, "-u"] + executable
 92 |             if args:
 93 |                 command += args
 94 |             process = subprocess.Popen(command, bufsize=0, stdout=stdout_file, stderr=stderr_file, env=env)
 95 | 
 96 |     print("Done")
 97 |     return process
 98 | 
 99 | if MPI:
100 |     GA_TO_MPI_OP = {
101 |         '+': MPI.SUM,
102 |         "sum": MPI.SUM,
103 |         "add": MPI.SUM,
104 |         '*': MPI.PROD,
105 |         "prod": MPI.PROD,
106 |         "product": MPI.PROD,
107 |         "mul": MPI.PROD,
108 |         "max": MPI.MAX,
109 |         "maximum": MPI.MAX,
110 |         "min": MPI.MIN,
111 |         "minimum": MPI.MIN,
112 |         }
113 | 
114 |     NP_TO_MPI_TYPE = {
115 |         np.dtype('bool'): MPI.C_BOOL,
116 |         np.dtype('int8'): MPI.INT8_T,
117 |         np.dtype('uint8'): MPI.UINT8_T,
118 |         np.dtype('int16'): MPI.INT16_T,
119 |         np.dtype('uint16'): MPI.UINT16_T,
120 |         np.dtype('int32'): MPI.INT32_T,
121 |         np.dtype('uint32'): MPI.UINT32_T,
122 |         np.dtype('int64'): MPI.INT64_T,
123 |         np.dtype('uint64'): MPI.UINT64_T,
124 |         np.dtype('float32'): MPI.FLOAT,
125 |         np.dtype('float64'): MPI.DOUBLE,
126 |         np.dtype('complex64'): MPI.C_FLOAT_COMPLEX,
127 |         np.dtype('complex128'): MPI.C_DOUBLE_COMPLEX,
128 |         # TODO How to handle half types in MPI?
129 |         #  np.dtype('float16'): MPI.HALF,
130 |         }
131 | 
132 | 
133 | def op_to_mpi(op):
134 |     """
135 |     Converts pygpu collective reduce operation types to MPI reduce operation
136 |     types.
137 |     """
138 |     if MPI is None:
139 |         raise AttributeError("mpi4py is not imported")
140 |     res = GA_TO_MPI_OP.get(op.lower())
141 |     if res is not None:
142 |         return res
143 |     raise ValueError("Invalid reduce operation: {}".format(str(op)))
144 | 
145 | 
146 | def dtype_to_mpi(dtype):
147 |     """
148 |     Converts numpy datatypes to MPI datatypes.
149 |     """
150 |     if MPI is None:
151 |         raise AttributeError("mpi4py is not imported")
152 |     res = NP_TO_MPI_TYPE.get(np.dtype(dtype))
153 |     if res is not None:
154 |         return res
155 |     raise TypeError("Conversion from dtype {} is not known".format(dtype))
156 | 
157 | 
158 | class SingletonType(type):
159 |     """
160 |     Metaclass that implements the singleton pattern for a Python class.
161 |     """
162 |     def __init__(cls, name, bases, dict):
163 |         super(SingletonType, cls).__init__(name, bases, dict)
164 |         cls.instance = None
165 | 
166 |     def __call__(cls, *args, **kwds):
167 |         if cls.instance is None:
168 |             cls.args = args
169 |             cls.kwds = kwds
170 |             cls.instance = super(SingletonType, cls).__call__(*args, **kwds)
171 |         else:
172 |             if args or kwds:
173 |                 print(PlatoonWarning("Worker instance has already been initialized."
174 |                                      "\nArgs: {0}, Kwds: {1}".format(args, kwds)),
175 |                       file=sys.stderr)
176 |         return cls.instance
177 | 


--------------------------------------------------------------------------------
/platoon/tests/unit/test_controller.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import six
  3 | import unittest
  4 | 
  5 | from ...channel import controller
  6 | 
  7 | if six.PY3:
  8 |     buffer_ = memoryview
  9 | else:
 10 |     buffer_ = buffer  # noqa
 11 | 
 12 | 
 13 | class TestController(unittest.TestCase):
 14 |     @classmethod
 15 |     def setUpClass(cls):
 16 |         cls.local_size = 3
 17 |         cls.devices = ["cuda0", "cuda1", "cuda2"]
 18 |         cls.control = controller.Controller(5567, devices=cls.devices)
 19 | 
 20 |     @classmethod
 21 |     def tearDownClass(cls):
 22 |         cls.control._close()
 23 | 
 24 |     def test_is_worker_first(self):
 25 |         first = self.control._is_worker_first(self.control._am_i_first_count)
 26 |         assert first
 27 |         first = self.control._is_worker_first(self.control._am_i_first_count)
 28 |         assert not first
 29 |         first = self.control._is_worker_first(self.control._am_i_first_count)
 30 |         assert not first
 31 |         first = self.control._is_worker_first(self.control._am_i_first_count)
 32 |         assert first
 33 |         first = self.control._is_worker_first(self.control._am_i_first_count)
 34 |         assert not first
 35 |         first = self.control._is_worker_first(self.control._am_i_first_count)
 36 |         assert not first
 37 | 
 38 |     def test_get_platoon_info(self):
 39 |         req_info = {}
 40 | 
 41 |         req_info['local_id'] = '1'
 42 |         req_info['device'] = 'cuda0'
 43 |         res = self.control._get_platoon_info(req_info)
 44 |         assert set(res.keys()) == set(['local_id', 'local_size', 'local_rank', 'multinode', 'global_size', 'global_rank'])
 45 |         assert res['local_id'] == "platoon-1"
 46 |         assert res['local_size'] == self.local_size
 47 |         assert res['local_rank'] == 0
 48 |         assert not res['multinode']
 49 |         assert res['global_size'] == self.local_size
 50 | 
 51 |         req_info['local_id'] = '2'
 52 |         req_info['device'] = 'cuda1'
 53 |         res = self.control._get_platoon_info(req_info)
 54 |         assert set(res.keys()) == set(['local_id', 'local_size', 'local_rank', 'multinode', 'global_size', 'global_rank'])
 55 |         assert res['local_id'] == "platoon-1"
 56 |         assert res['local_size'] == self.local_size
 57 |         assert res['local_rank'] == 1
 58 |         assert not res['multinode']
 59 |         assert res['global_size'] == self.local_size
 60 | 
 61 |         req_info['local_id'] = '3'
 62 |         req_info['device'] = 'cuda2'
 63 |         res = self.control._get_platoon_info(req_info)
 64 |         assert set(res.keys()) == set(['local_id', 'local_size', 'local_rank', 'multinode', 'global_size', 'global_rank'])
 65 |         assert res['local_id'] == "platoon-1"
 66 |         assert res['local_size'] == self.local_size
 67 |         assert res['local_rank'] == 2
 68 |         assert not res['multinode']
 69 |         assert res['global_size'] == self.local_size
 70 | 
 71 |         req_info['local_id'] = 'asdfasfda'
 72 |         req_info['device'] = 'cuda1'
 73 |         res = self.control._get_platoon_info(req_info)
 74 |         assert set(res.keys()) == set(['local_id', 'local_size', 'local_rank', 'multinode', 'global_size', 'global_rank'])
 75 |         assert res['local_id'] == "platoon-asdfasfda"
 76 |         assert res['local_size'] == self.local_size
 77 |         assert res['local_rank'] == 1
 78 |         assert not res['multinode']
 79 |         assert res['global_size'] == self.local_size
 80 | 
 81 |     def test_init_new_shmem(self):
 82 |         self.control._job_uid = "yo"
 83 |         req_info = {'size': 64}
 84 | 
 85 |         res = self.control._init_new_shmem(req_info)
 86 |         assert res == "platoon-yo_0_buffer"
 87 |         assert len(self.control.shared_buffers) == 1
 88 |         assert len(self.control._shmrefs) == 1
 89 |         assert self.control._last_shmem_name == "platoon-yo_0_buffer"
 90 |         a = self.control.shared_buffers[res]
 91 |         try:
 92 |             buffer_(a)
 93 |         except TypeError:
 94 |             self.fail("self.control.shared_buffers[{}] does not provide buffer interface.".format(0))
 95 |         assert len(a) == 64
 96 | 
 97 |         res = self.control._init_new_shmem(req_info)
 98 |         assert res == "platoon-yo_0_buffer"
 99 |         assert len(self.control.shared_buffers) == 1
100 |         assert len(self.control._shmrefs) == 1
101 |         assert self.control._last_shmem_name == "platoon-yo_0_buffer"
102 |         b = self.control.shared_buffers[res]
103 |         try:
104 |             buffer_(b)
105 |         except TypeError:
106 |             self.fail("self.control.shared_buffers[{}] does not provide buffer interface.".format(0))
107 |         assert len(b) == 64
108 |         assert b == a
109 | 
110 |         res = self.control._init_new_shmem(req_info)
111 |         assert res == "platoon-yo_0_buffer"
112 |         assert len(self.control.shared_buffers) == 1
113 |         assert len(self.control._shmrefs) == 1
114 |         assert self.control._last_shmem_name == "platoon-yo_0_buffer"
115 |         c = self.control.shared_buffers[res]
116 |         try:
117 |             buffer_(c)
118 |         except TypeError:
119 |             self.fail("self.control.shared_buffers[{}] does not provide buffer interface.".format(0))
120 |         assert len(c) == 64
121 |         assert c == a
122 | 
123 |         req_info = {'size': 512}
124 |         res = self.control._init_new_shmem(req_info)
125 |         assert res == "platoon-yo_1_buffer"
126 |         assert len(self.control.shared_buffers) == 2
127 |         assert len(self.control._shmrefs) == 2
128 |         assert self.control._last_shmem_name == "platoon-yo_1_buffer"
129 |         e = self.control.shared_buffers[res]
130 |         try:
131 |             buffer_(e)
132 |         except TypeError:
133 |             self.fail("self.control.shared_buffers[{}] does not provide buffer interface.".format(1))
134 |         assert len(e) == 512
135 |         assert e != c
136 | 
137 |         res = self.control._init_new_shmem(req_info)
138 |         assert res == "platoon-yo_1_buffer"
139 |         assert len(self.control.shared_buffers) == 2
140 |         assert len(self.control._shmrefs) == 2
141 |         assert self.control._last_shmem_name == "platoon-yo_1_buffer"
142 |         f = self.control.shared_buffers[res]
143 |         try:
144 |             buffer_(f)
145 |         except TypeError:
146 |             self.fail("self.control.shared_buffers[{}] does not provide buffer interface.".format(1))
147 |         assert len(f) == 512
148 |         assert f != c
149 |         assert f == e
150 | 
151 |         res = self.control._init_new_shmem(req_info)
152 |         assert res == "platoon-yo_1_buffer"
153 |         assert len(self.control.shared_buffers) == 2
154 |         assert len(self.control._shmrefs) == 2
155 |         assert self.control._last_shmem_name == "platoon-yo_1_buffer"
156 |         g = self.control.shared_buffers[res]
157 |         try:
158 |             buffer_(g)
159 |         except TypeError:
160 |             self.fail("self.control.shared_buffers[{}] does not provide buffer interface.".format(1))
161 |         assert len(g) == 512
162 |         assert g != c
163 |         assert g == e
164 | 


--------------------------------------------------------------------------------
/example/synchronous_lstm/lstm_controller.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function
  2 | import os
  3 | import sys
  4 | import time
  5 | 
  6 | import numpy
  7 | 
  8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
  9 | from platoon.channel import Controller
 10 | 
 11 | 
 12 | class LSTMController(Controller):
 13 |     """
 14 |     This multi-process controller implements patience-based early-stopping SGD
 15 |     """
 16 | 
 17 |     def __init__(self, seed, patience, default_args):
 18 |         """
 19 |         Initialize the LSTMController
 20 | 
 21 |         Parameters
 22 |         ----------
 23 |         max_mb : int
 24 |             Max number of minibatches to train on.
 25 |         patience: : int
 26 |             Training stops when this many minibatches have been trained on
 27 |             without any reported improvement.
 28 |         valid_freq : int
 29 |             Number of minibatches to train on between every monitoring step.
 30 |         default_args : dict
 31 |             Arguments of default class Controller
 32 |         """
 33 |         super(LSTMController, self).__init__(**default_args)
 34 |         self.nb_worker = len(self._devices)
 35 |         # map ids to members of range(nb_worker)
 36 |         self.worker_ids_dict = dict(zip(self._workers, [i for i in range(len(self._workers))]))
 37 | 
 38 |         self.patience = patience
 39 |         self.seed = seed
 40 | 
 41 |         self.valid_history_errs = [[None for i in range(self.nb_worker)]]
 42 |         self.test_history_errs = [[None for i in range(self.nb_worker)]]
 43 |         self.bad_counter = 0
 44 |         self._epoch = 0
 45 |         self.best_dict = dict(best__epoch=-1, best_valid=numpy.inf)
 46 | 
 47 | 
 48 |     def handle_control(self, req, worker_id, req_info):
 49 |         """
 50 |         Handles a control_request received from a worker
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         req : str or dict
 55 |             Control request received from a worker.
 56 |             The control request can be one of the following
 57 |             1) "next" : request by a worker to be informed of its next action
 58 |                to perform. The answers from the server can be 'train' (the
 59 |                worker should keep training on its training data), 'valid' (the
 60 |                worker should perform monitoring on its validation set and test
 61 |                set) or 'stop' (the worker should stop training).
 62 |             2) dict of format {"done":N} : used by a worker to inform the
 63 |                 server that is has performed N more training iterations and
 64 |                 synced its parameters. The server will respond 'stop' if the
 65 |                 maximum number of training minibatches has been reached.
 66 |             3) dict of format {"valid_err":x, "test_err":x2} : used by a worker
 67 |                 to inform the server that it has performed a monitoring step
 68 |                 and obtained the included errors on the monitoring datasets.
 69 |                 The server will respond "best" if this is the best reported
 70 |                 validation error so far, otherwise it will respond 'stop' if
 71 |                 the patience has been exceeded.
 72 |         """
 73 |         control_response = ""
 74 |         worker_id = self.worker_ids_dict[worker_id]
 75 | 
 76 |         if req == 'pred_errors':
 77 |             if self.valid_history_errs[self._epoch][worker_id] is not None:
 78 |                 # if a worker tries to add a valid error where there is no None
 79 |                 # it means it tries to index after or before current _epoch
 80 |                 raise RuntimeError('Worker got out of synch!')
 81 |             self.valid_history_errs[self._epoch][worker_id] = req_info['valid_err']
 82 |             self.test_history_errs[self._epoch][worker_id] = req_info['test_err']
 83 | 
 84 |             if not any([i is None for i in self.valid_history_errs[self._epoch]]):
 85 |                 print('Epoch %d is done'%req_info['epoch'])
 86 |                 valid_err = sum(self.valid_history_errs[self._epoch]) / float(self.nb_worker)
 87 | 
 88 |                 if valid_err <= self.best_dict['best_valid']:
 89 |                     self.best_dict['best_epoch'] = self._epoch
 90 |                     self.best_dict['best_valid'] = valid_err
 91 |                     self.bad_counter = 0
 92 |                     control_response = 'best'
 93 |                     print("Best error valid:", valid_err)
 94 |                 else:
 95 |                     self.bad_counter += 1
 96 |                 self.valid_history_errs += [[None for i in range(self.nb_worker)]]
 97 |                 self.test_history_errs += [[None for i in range(self.nb_worker)]]
 98 |                 self._epoch += 1
 99 | 
100 |         elif req == 'splits':
101 |             # the controller never loads the dataset but the worker doesn't
102 |             # know how many workers there are
103 |             train_len = req_info['train_len'] // self.nb_worker
104 |             valid_len = req_info['valid_len'] // self.nb_worker
105 |             test_len = req_info['test_len'] // self.nb_worker
106 |             splits = dict(train_splits=[train_len * worker_id, train_len * (worker_id + 1)],
107 |                           valid_splits=[valid_len * worker_id, valid_len * (worker_id + 1)],
108 |                           test_splits=[test_len * worker_id, test_len * (worker_id + 1)])
109 |             control_response = splits
110 | 
111 |             # kind of when the training start but not really
112 |             self.start_time = time.time()
113 | 
114 |         elif req == 'seed':
115 |             control_response = self.seed
116 | 
117 |         if self.bad_counter > self.patience:
118 |             print("Early stopping!")
119 |             end_time = time.time() - self.start_time
120 |             # should terminate with best printing and best dumping of params
121 |             # and then close everything
122 |             print("Best error valid:", self.best_dict['best_valid'])
123 |             test_err = sum(self.test_history_errs[self.best_dict['best_epoch']]) / \
124 |                     float(self.nb_worker)
125 |             print("Best error test:", test_err)
126 |             print( ("Training took %.1fs" % (end_time)), file=sys.stderr)
127 |             control_response = 'stop'
128 | 
129 |         return control_response
130 | 
131 | 
132 | def lstm_control(saveFreq=1110, saveto=None):
133 |     parser = Controller.default_parser()
134 |     parser.add_argument('--seed', default=1234, type=int,
135 |                         required=False, help='Maximum mini-batches to train upon in total.')
136 |     parser.add_argument('--patience', default=10, type=int, required=False,
137 |                         help='Maximum patience when failing to get better validation results.')
138 |     args = parser.parse_args()
139 | 
140 |     l = LSTMController(seed=args.seed,
141 |                        patience=args.patience,
142 |                        default_args=Controller.default_arguments(args))
143 | 
144 |     print("Controller is ready")
145 |     return l.serve()
146 | 
147 | if __name__ == '__main__':
148 |     rcode = lstm_control()
149 |     if rcode != 0:
150 |         sys.exit(rcode)
151 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # platoon
  2 | Experimental multi-GPU mini-framework for Theano
  3 | 
  4 | It supports **data-parallelism** inside one compute node, not
  5 | model-parallelism. For model-parallelism check [Theano multiple GPUs
  6 | tutorial](http://deeplearning.net/software/theano/tutorial/using_multi_gpu.html).
  7 | 
  8 | In Platoon, there are two main components : workers, and controllers.
  9 | Workers do the bulk of the work (training, monitoring, ...). Controllers
 10 | interact with multiple workers to coordinate their work, collect the results
 11 | and decide how to act on them. To use Platoon, you will need to write code which
 12 | uses a worker. You can also extend the functionality of a worker or a controller by
 13 | implementing your own. Platoon provides helper classes to
 14 | facilitate this.
 15 | 
 16 | This framework is under development. Its interface is not polished and it is
 17 | likely to undergo changes in the future.
 18 | 
 19 | The framework provides two separate worker interfaces that allow user to implement
 20 | multiple data-parallel algorithms: *param_sync* and *all_reduce*. The default interface
 21 | is *param_sync*. Installing optional dependencies listed in the features table below
 22 | will make *all_reduce* interface available too.
 23 | 
 24 | Interface | sync type | multi-node                  | Theano Ops | extra dependencies
 25 | ----------|-----------|-----------------------------|------------|--------------------
 26 | param_sync| sync/async|  no                         | no         | no
 27 | all_reduce| sync only | yes (if [mpi4py](https://github.com/mpi4py/mpi4py) is installed)| yes        | [NCCL](https://github.com/NVIDIA/nccl), [pygpu](https://github.com/Theano/libgpuarray), [Theano](https://github.com/Theano/Theano)
 28 | 
 29 | There are currently two algorithms for distributed gradient descent implemented with
 30 | *param_sync* interface and three with *all_reduce* interface.
 31 | 
 32 | * **param_sync**: [EASGD](http://arxiv.org/abs/1412.6651) and ASGD.
 33 | * **all_reduce**: Synchronous sum/average SGD, EASGD and a synchronous variant of [Downpour](http://research.google.com/archive/large_deep_networks_nips2012.html)
 34 | 
 35 | There are working examples in the examples directory.
 36 | 
 37 | The steps below describe what needs to be done to use Platoon for
 38 | data-parallelism. The LSTM example in the folder 'example' was implemented
 39 | following these steps and should be referred to for guidance.
 40 | 
 41 | 
 42 | ## Install
 43 | You can simply install it using pip.
 44 | `pip install git+https://github.com/mila-udem/platoon`
 45 | 
 46 | 
 47 | If you would like to use the examples or help develop platoon first you have to clone the repo.
 48 | 
 49 | `git clone https://github.com/mila-udem/platoon`
 50 | 
 51 | Then install what you just cloned.
 52 | 
 53 | `pip install -e <path-to-platoon-folder>`
 54 | 
 55 | 
 56 | ## Usage
 57 | The simplest way to launch a multi-gpu experiment is to first implement a controller and a worker as described below and then launch it using the `platoon-launcher`. It is not necessary that you have implemented a controller file if you want
 58 | to use the existing controller functionality.
 59 | 
 60 | The launcher assume that you named both files as such: `<experiment-name>_controller.py` and `<experiment-name>_worker.py`.
 61 | 
 62 | Then to launch the experiment you just need to specify the experiment name and GPUs you want to use:
 63 | 
 64 | `platoon-launcher <experiment-name> -D gpu0 gpu1`
 65 | 
 66 | You can also omit the `-D` argument and let launcher find all available CUDA GPUs to use
 67 | in the single-node experiment:
 68 | 
 69 | `platoon-launcher <experiment-name>`
 70 | 
 71 | For more configuration options, see `platoon-launcher -h`.
 72 | 
 73 | 
 74 | ### Implementing a controller
 75 | These steps describe how to implement the Python script that will launch
 76 | your controller. In the included LSTM example, both of these steps are done
 77 | in the file `lstm_controller.py`
 78 | 
 79 | 1) Define which commands your controller can receive and how it responds to
 80 | them. Commands starting by "platoon-" are reserved by platoon.
 81 | 
 82 | This is done by creating a new class that inherits from channel.Controller
 83 | and having it override the method `handle_control()` which will be called
 84 | whenever your controller receives a request from a worker.
 85 | 
 86 | 2) Instantiate and launch your custom controller.
 87 | 
 88 | Create a script that will instantiate your custom controller. Once this is
 89 | done, define the port on which the controller should listen by calling the
 90 | function `init_control`. Finally, call your controller's `serve` method which
 91 | will make him ready to receive requests from workers.
 92 | 
 93 | ### Implementing the workers
 94 | These steps describe how to start with a script that performs stand-alone
 95 | training of a machine learning model and adapt it to serve as a worker in
 96 | Platoon.
 97 | 
 98 | 1) Add a new parameter to the script which will be used during execution to
 99 | know whether the worker is the first one to be launched and should create the
100 | central parameters or not.
101 | 
102 | 2) Before entering the main loop, the script must create an instance of the
103 | class channel.Worker, providing it with the same port number as used to
104 | initialize the controller. It is not necessary to sub-class Worker, you can
105 | instantiate it directly. This object will provide the necessary methods to
106 | handle communication with the controller.
107 | 
108 | 3) After the model has been built and the parameters initialized,
109 | initialize the central parameters by calling the Worker's
110 | `init_shared_params()` method. Every worker should call this method.
111 | 
112 | 4) In the main loop, instead of deciding when to train and when to monitor
113 | performance, the worker should send control request to the controller to know
114 | what action it should take, according to the communication protocol
115 | established in the controller's `handle_control()` method.
116 | 
117 | 5) In the main loop, whenever the worker has performed `N` (a hyper-parameter)
118 | iterations of training, it should synchronize it's parameters with the central
119 | parameters using it's Worker's `sync_params()` method.
120 | 
121 | 
122 | ### Real usage consideration
123 | The optimal (as in more efficient for learning) hyper-parameters values are
124 | dependent on the number of workers. At least, consider tuning the
125 | learning rate and the alpha parameter of EASGD.
126 | 
127 | How to change the alpha hyper-parameter isn't clear. An alpha of 0.5
128 | for the LSTM example with 2 workers seem to have good training
129 | efficiency for this model/dataset/hyper-parameter combination.
130 | 
131 | Using alpha = 1/N (with N being the number of workers) might be a
132 | reasonable guideline but the experiments performed with Platoon are
133 | insufficient to conclude anything.
134 | 
135 | In the EASGD paper it is shown that in some cases a larger number of
136 | workers can result in a better test error.
137 | 
138 | ## Examples
139 | For *param sync* interface, see `example/lstm/` [folder](https://github.com/mila-udem/platoon/tree/master/example/lstm).
140 | 
141 | For *all reduce* interface, see `example/synchronous_lstm/` [folder](https://github.com/mila-udem/platoon/tree/master/example/synchronous_lstm).
142 | 
143 | 


--------------------------------------------------------------------------------
/platoon/tests/functional/test_ops_worker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function, division
  2 | import os
  3 | import sys
  4 | 
  5 | import unittest
  6 | 
  7 | import theano
  8 | from theano import config
  9 | import numpy as np
 10 | 
 11 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
 12 | from platoon import Worker
 13 | from platoon import ops
 14 | 
 15 | 
 16 | class TestOpsWorker(unittest.TestCase):
 17 |     @classmethod
 18 |     def setUpClass(cls):
 19 |         try:
 20 |             cls.total_nw = int(os.environ['PLATOON_TEST_WORKERS_NUM'])
 21 |             cls.worker = Worker(control_port=5567)
 22 |         except Exception as exc:
 23 |             print(exc, file=sys.stderr)
 24 |             raise exc
 25 | 
 26 |     def setUp(self):
 27 |         super(TestOpsWorker, self).setUp()
 28 |         SEED = 567
 29 |         np.random.seed(SEED)
 30 |         self.inp = 30 * np.random.random((100, 400, 200)).astype(config.floatX)
 31 |         self.sinp = theano.shared(self.inp)
 32 |         self.out = np.empty_like(self.inp)
 33 |         self.sout = theano.shared(self.out)
 34 | 
 35 |     def test_all_reduce_sum(self):
 36 |         res = ops.AllReduceSum(self.sinp)
 37 |         f = theano.function([], [], updates=[(self.sout, res)],
 38 |                             profile=True)
 39 |         expected = self.total_nw * self.inp
 40 |         f()
 41 |         actual = self.sout.get_value()
 42 |         assert np.allclose(expected, actual)
 43 | 
 44 |         # This is faster, because it runs inplace!
 45 |         res = ops.AllReduceSum(self.sinp, self.sout)
 46 |         f = theano.function([], [], updates=[(self.sout, res)],
 47 |                             accept_inplace=True, profile=True)
 48 |         expected = self.total_nw * self.inp
 49 |         f()
 50 |         actual = self.sout.get_value()
 51 |         assert np.allclose(expected, actual)
 52 | 
 53 |         x = theano.tensor.scalar(dtype=config.floatX)
 54 |         res = ops.AllReduceSum(self.sinp, self.sout)
 55 |         f = theano.function([x], [], updates=[(self.sout, res / x)],
 56 |                             accept_inplace=True, profile=True)
 57 |         expected = self.total_nw * self.inp / 2
 58 |         f(2)
 59 |         actual = self.sout.get_value()
 60 |         assert np.allclose(expected, actual)
 61 |         expected = self.total_nw * self.inp / 3.14159
 62 |         f(3.14159)
 63 |         actual = self.sout.get_value()
 64 |         assert np.allclose(expected, actual)
 65 | 
 66 |         x = theano.tensor.scalar(dtype=config.floatX)
 67 |         self.sinp *= x
 68 |         res = ops.AllReduceSum(self.sinp, self.sout)
 69 |         f = theano.function([x], [], updates=[(self.sout, res)],
 70 |                             accept_inplace=True, profile=True)
 71 |         expected = self.total_nw * self.inp * 2
 72 |         f(2)
 73 |         actual = self.sout.get_value()
 74 |         assert np.allclose(expected, actual)
 75 |         expected = self.total_nw * self.inp * 3.14159
 76 |         f(3.14159)
 77 |         actual = self.sout.get_value()
 78 |         assert np.allclose(expected, actual)
 79 | 
 80 |     def test_all_reduce_sum_inplace(self):
 81 |         res = ops.AllReduceSum(self.sinp, inplace=True)
 82 |         f = theano.function([], [], updates=[(self.sinp, res)],
 83 |                             accept_inplace=True, profile=True)
 84 |         expected = self.total_nw * self.inp
 85 |         f()
 86 |         actual = self.sinp.get_value()
 87 |         assert np.allclose(expected, actual)
 88 | 
 89 |     def test_all_reduce_prod(self):
 90 |         res = ops.AllReduceProd(self.sinp)
 91 |         f = theano.function([], [], updates=[(self.sout, res)],
 92 |                             profile=True)
 93 |         expected = self.inp ** self.total_nw
 94 |         f()
 95 |         actual = self.sout.get_value()
 96 |         assert np.allclose(expected, actual)
 97 | 
 98 |         # This is faster, because it runs inplace!
 99 |         res = ops.AllReduceProd(self.sinp, self.sout)
100 |         f = theano.function([], [], updates=[(self.sout, res)],
101 |                             accept_inplace=True, profile=True)
102 |         expected = self.inp ** self.total_nw
103 |         f()
104 |         actual = self.sout.get_value()
105 |         assert np.allclose(expected, actual)
106 | 
107 |     def test_all_reduce_prod_inplace(self):
108 |         res = ops.AllReduceProd(self.sinp, inplace=True)
109 |         f = theano.function([], [], updates=[(self.sinp, res)],
110 |                             accept_inplace=True, profile=True)
111 |         expected = self.inp ** self.total_nw
112 |         f()
113 |         actual = self.sinp.get_value()
114 |         assert np.allclose(expected, actual)
115 | 
116 |     def test_all_reduce_maximum(self):
117 |         res = ops.AllReduceMax(self.sinp)
118 |         f = theano.function([], [], updates=[(self.sout, res)],
119 |                             profile=True)
120 |         expected = self.inp
121 |         f()
122 |         actual = self.sout.get_value()
123 |         assert np.allclose(expected, actual)
124 | 
125 |         # This is faster, because it runs inplace!
126 |         res = ops.AllReduceMax(self.sinp, self.sout)
127 |         f = theano.function([], [], updates=[(self.sout, res)],
128 |                             accept_inplace=True, profile=True)
129 |         expected = self.inp
130 |         f()
131 |         actual = self.sout.get_value()
132 |         assert np.allclose(expected, actual)
133 | 
134 |     def test_all_reduce_max_inplace(self):
135 |         res = ops.AllReduceMax(self.sinp, inplace=True)
136 |         f = theano.function([], [], updates=[(self.sinp, res)],
137 |                             accept_inplace=True, profile=True)
138 |         expected = self.inp
139 |         f()
140 |         actual = self.sinp.get_value()
141 |         assert np.allclose(expected, actual)
142 | 
143 |     def test_all_reduce_minimum(self):
144 |         res = ops.AllReduceMin(self.sinp)
145 |         f = theano.function([], [], updates=[(self.sout, res)],
146 |                             profile=True)
147 |         expected = self.inp
148 |         f()
149 |         actual = self.sout.get_value()
150 |         assert np.allclose(expected, actual)
151 | 
152 |         # This is faster, because it runs inplace!
153 |         res = ops.AllReduceMin(self.sinp, self.sout)
154 |         f = theano.function([], [], updates=[(self.sout, res)],
155 |                             accept_inplace=True, profile=True)
156 |         expected = self.inp
157 |         f()
158 |         actual = self.sout.get_value()
159 |         assert np.allclose(expected, actual)
160 | 
161 |     def test_all_reduce_min_inplace(self):
162 |         res = ops.AllReduceMin(self.sinp, inplace=True)
163 |         f = theano.function([], [], updates=[(self.sinp, res)],
164 |                             accept_inplace=True, profile=True)
165 |         expected = self.inp
166 |         f()
167 |         actual = self.sinp.get_value()
168 |         assert np.allclose(expected, actual)
169 | 
170 |     def test_on_diferent_types(self):
171 |         tmp = np.empty_like(self.inp, dtype='int32')
172 |         stmp = theano.shared(tmp)
173 |         self.assertRaises(TypeError, ops.AllReduceSum, self.sinp, stmp)
174 | 
175 |     @classmethod
176 |     def tearDownClass(cls):
177 |         cls.worker.close()
178 | 
179 | if __name__ == '__main__':
180 |     suite = unittest.TestLoader().loadTestsFromTestCase(TestOpsWorker)
181 |     res = unittest.TextTestRunner(verbosity=1).run(suite)
182 |     if len(res.failures) != 0 or len(res.errors) != 0:
183 |         sys.exit(1)
184 | 


--------------------------------------------------------------------------------
/platoon/ops.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | :mod:`ops` -- Theano Ops for Worker interface
  4 | =============================================
  5 | 
  6 | .. module:: ops
  7 |    :platform: Unix
  8 |    :synopsis: Contains AllReduce Theano Op and builder function for each
  9 |               reduce operation type.
 10 | 
 11 | """
 12 | from __future__ import absolute_import, print_function
 13 | import sys
 14 | 
 15 | from six.moves import xrange
 16 | 
 17 | try:
 18 |     import theano
 19 |     from theano.gradient import grad_not_implemented
 20 |     from theano.gpuarray.basic_ops import as_gpuarray_variable
 21 | except ImportError as exc:
 22 |     print("ERROR! On {}:".format(__name__), exc, file=sys.stderr)
 23 |     theano = None
 24 | 
 25 | from .channel.worker import Worker
 26 | 
 27 | 
 28 | if theano:
 29 |     class AllReduce(theano.Op):
 30 |         """Wrapper of :class:`channel.worker.Worker`.
 31 | 
 32 |         For full documentation, see builder functions:
 33 |            * :func:`AllReduceSum`
 34 |            * :func:`AllReduceProd`
 35 |            * :func:`AllReduceMax`
 36 |            * :func:`AllReduceMin`
 37 | 
 38 |         :param scalar_op: Representation of collective reduce operation type.
 39 |         :type scalar_op: {str, :ref:`theano.scalar.add`, :ref:`theano.scalar.mul`,
 40 |                           :ref:`theano.scalar.maximum`, :ref:`theano.scalar.minimum`}
 41 | 
 42 |         .. seealso:: module :mod:`channel.worker`
 43 | 
 44 |         .. versionadded:: 0.6.0
 45 | 
 46 |         """
 47 |         __props__ = ("scalar_op", )
 48 | 
 49 |         def __init__(self, scalar_op, inplace=False, worker=None):
 50 |             if worker is not None:
 51 |                 if isinstance(worker, Worker):
 52 |                     self.worker = worker
 53 |                 else:
 54 |                     raise TypeError("Argument `worker` is not of platoon.Worker type.")
 55 |             else:
 56 |                 try:
 57 |                     self.worker = Worker()  # Get singleton instance
 58 |                 except TypeError:
 59 |                     raise AttributeError("Worker instance has not been created yet.")
 60 |             # This is because I have not found a way to use half-types through MPI
 61 |             self._f16_ok = not self.worker._multinode
 62 |             self.scalar_op = scalar_op
 63 |             self.inplace = inplace
 64 | 
 65 |         def __str__(self):
 66 |             if self.inplace:
 67 |                 return "AllReduce{%s,inplace}<gpuarray.collectives>" % (str(self.scalar_op).capitalize())
 68 |             else:
 69 |                 return "AllReduce{%s,no_inplace}<gpuarray.collectives>" % (str(self.scalar_op).capitalize())
 70 | 
 71 |         def make_node(self, src, dest=None):
 72 |             if dest is None:
 73 |                 inputs = [src]
 74 |                 if self.inplace:
 75 |                     self.inplace_pattern = {0: 0}
 76 |                 else:
 77 |                     self.inplace_pattern = {}
 78 |             else:
 79 |                 inputs = [src, dest]
 80 |                 self.inplace = True
 81 |                 self.inplace_pattern = {0: 1}
 82 |             self.destroy_map = dict((o, [i]) for o, i in self.inplace_pattern.items())
 83 |             inputs = [as_gpuarray_variable(i, self.worker.ctx_name) for i in inputs]
 84 |             if dest is not None:
 85 |                 if not inputs[0].type == inputs[1].type:
 86 |                     raise TypeError("`src` and `dest` must have the same Type:",
 87 |                                     (inputs[0].type, inputs[1].type))
 88 |             out_type = inputs[0].type.clone()
 89 |             return theano.Apply(self, inputs, [out_type()])
 90 | 
 91 |         def infer_shapes(self, node, shapes):
 92 |             return [shapes[0]]
 93 | 
 94 |         def perform(self, node, inputs, outputs):
 95 |             out = outputs[0]
 96 |             src = inputs[0]
 97 |             if len(node.inputs) == 2:  # If inplace op
 98 |                 dest = inputs[1]
 99 |                 self.worker.all_reduce(src, str(self.scalar_op), dest)
100 |                 out[0] = dest
101 |             elif self.inplace:
102 |                 self.worker.all_reduce(src, str(self.scalar_op), src)
103 |                 out[0] = src
104 |             else:
105 |                 out[0] = self.worker.all_reduce(src, str(self.scalar_op))
106 | 
107 |         def grad(self, inputs, ograds):
108 |             return [grad_not_implemented(self, i, inputs[i]) for i in xrange(len(inputs))]
109 | 
110 |     def AllReduceSum(src, dest=None, inplace=False, worker=None):
111 |         """
112 |         Element-wise sum  of `src` GPU tensor across all
113 |         Platoon worker processes.
114 | 
115 |         Parameters
116 |         ----------
117 |         src : GPU tensor (array-like)
118 |            Input array.
119 |         dest : GPU tensor (array-like), optional
120 |            Output array. If None (default) is given, then an GPU array-like
121 |            will be returned with result, which has the same shape and datatype
122 |            as `src`.
123 |         inplace : bool, optional
124 |            If True, then operation will happen inplace and the result will be
125 |            written in array `src`.
126 |         worker : :class:`channel.worker.Worker`, optional
127 |            Platoon Worker instance unique to a single process which will be used
128 |            to execute the operation. If None (default) is given, the singleton
129 |            instance will be used.
130 | 
131 |         Returns
132 |         -------
133 |         result : GPU tensor (array-like)
134 |            Result array will be `dest` if it was specified in the arguments,
135 |            `src` if `inplace` is True, else a new variable which points to
136 |            operation's result.
137 | 
138 |         Notes
139 |         -----
140 |         * If `dest` is given, then the Op is inplace in Theano sense.
141 |         * If a `worker` is not given, then a Worker instance must have been
142 |           already instantiated.
143 | 
144 |         Raises
145 |         ------
146 |         TypeError
147 |            If `worker` specified is not of type :class:`channel.worker.Worker`
148 |            or if `src` and `dest` are not of the same Theano Type.
149 |         AttributeError
150 |            If singleton Worker has not been instantiated yet.
151 | 
152 |         .. versionadded:: 0.6.0
153 | 
154 |         """
155 |         return AllReduce(theano.scalar.add, inplace, worker)(src, dest)
156 | 
157 |     def AllReduceProd(src, dest=None, inplace=False, worker=None):
158 |         """
159 |         Element-wise multiplication of `src` GPU tensor across all
160 |         Platoon worker processes.
161 | 
162 |         .. seealso::
163 |            Function :func:`AllReduceSum`
164 |               For documentation on parameters, return variables, notes and
165 |               raises.
166 | 
167 |         .. versionadded:: 0.6.0
168 | 
169 |         """
170 |         return AllReduce(theano.scalar.mul, inplace, worker)(src, dest)
171 | 
172 |     def AllReduceMax(src, dest=None, inplace=False, worker=None):
173 |         """
174 |         Find element-wise maximum of `src` GPU tensor across all
175 |         Platoon worker processes.
176 | 
177 |         .. seealso::
178 |            Function :func:`AllReduceSum`
179 |               For documentation on parameters, return variables, notes and
180 |               raises.
181 | 
182 |         .. versionadded:: 0.6.0
183 | 
184 |         """
185 |         return AllReduce(theano.scalar.maximum, inplace, worker)(src, dest)
186 | 
187 |     def AllReduceMin(src, dest=None, inplace=False, worker=None):
188 |         """
189 |         Find element-wise minimum of `src` GPU tensor across all
190 |         Platoon worker processes.
191 | 
192 |         .. seealso::
193 |            Function :func:`AllReduceSum`
194 |               For documentation on parameters, return variables, notes and
195 |               raises.
196 | 
197 |         .. versionadded:: 0.6.0
198 | 
199 |         """
200 |         return AllReduce(theano.scalar.minimum, inplace, worker)(src, dest)
201 | else:
202 |     AllReduce = None
203 |     AllReduceSum = None
204 |     AllReduceProd = None
205 |     AllReduceMax = None
206 |     AllReduceMin = None
207 | 


--------------------------------------------------------------------------------
/scripts/platoon-launcher:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | platoon-launcher
  5 | 
  6 | This file serves as an executable for launching a training procedure with
  7 | Platoon. Depending on the given arguments or configuration, the training will
  8 | start in a single machine or multiple hosts. Execute `platoon-launcher -h` to
  9 | see instructions or read the docs.
 10 | 
 11 | Exit Codes
 12 | ----------
 13 | 0: Success
 14 | 1: A worker or controller has exited with non-success status
 15 | 2: False arguments
 16 | 3: Subprocess or OS errors
 17 | 4: Other error
 18 | 
 19 | """
 20 | 
 21 | from __future__ import print_function
 22 | import os
 23 | import sys
 24 | import subprocess
 25 | import signal
 26 | import time
 27 | import shlex
 28 | import argparse
 29 | import textwrap
 30 | 
 31 | from platoon.util import launch_process
 32 | from platoon import configparser
 33 | 
 34 | 
 35 | def parse_arguments():
 36 |     parser = argparse.ArgumentParser(
 37 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 38 |         description=textwrap.dedent('''\
 39 | ################################################################################
 40 | #            Launcher for Platoon multi-GPU/node training framework            #
 41 | ################################################################################
 42 | Platoon will train your Theano models using multiple GPUs even if they do not
 43 | reside in the same host.
 44 | 
 45 | In order to use it, a worker file needs to be provided. A worker file defines
 46 | the training process of a single set of model parameters in a parallel and
 47 | distributed manner. Optionally and in case you want to extend the distributed
 48 | computation capabilities of the training process, you are encouraged to provide
 49 | a controller file which extends the default one (`platoon.controller` module) in
 50 | this framework.
 51 | 
 52 | Platoon is configured through the command-line arguments of this launcher and in
 53 | case of their absence (or if it needed) through environmental variables or
 54 | Platoon configuration files. Please read `platoonrc.conf` in package's root
 55 | directory to learn about every way that Platoon can be configured.
 56 | 
 57 | If single-node is explicitly specified through command-line arguments, the
 58 | specified devices will be used in the GPU communicator world in the order they
 59 | are parsed. The same thing applies also for lists of devices found in Platoon
 60 | environmentals or configuration files.
 61 | 
 62 | e.g. usage: platoon-launcher lstm -D cuda0 cuda3 (explicit config)
 63 |             platoon-launcher lstm  (config with envs/files - may be multi-node)
 64 | 
 65 | If multi-node is explicitly specified through command-line arguments, extra
 66 | configuration through appropriate environmentals per host or files needs to be
 67 | done in order to describe which devices will be used in each host. Host names
 68 | are given the same way they are given in MPI's `mpirun`.
 69 | 
 70 | e.g. usage: platoon-launcher lstm -H lisa0 lisa1
 71 |             (gpus on lisa0 and gpus on lisa1)
 72 | 
 73 | NOTIFICATION: This launcher is used to set up the new worker interface (the old
 74 | is still usable - but not in multi-node configs). The new worker interface
 75 | supports only CUDA devices currently. NVIDIA's "NCCL" collectives library and
 76 | "pygpu" are required for multi-GPU, while "mpi4py" is required in addition for
 77 | multi-node.'''))
 78 |     parser.add_argument('experiment_name', help='The name of your experiment. The launcher will expect to find the files <experiment_name>_worker.py and optionally <experiment_name>_controller.py.')
 79 |     single_or_multi = parser.add_mutually_exclusive_group(required=False)
 80 |     single_or_multi.add_argument('-D', '--devices', nargs='+', type=str, metavar='devname',
 81 |                                  required=False, help='List of Theano device names (e.g. gpu0 or cuda1). Each device will be assigned to a separate worker. If this option is specified, experiment will be run in a single node.')
 82 |     single_or_multi.add_argument('-H', '--hosts', nargs='+', type=str, metavar='hostname',
 83 |                                  required=False, help='List of host names to participate in multi-node training. Each host will be assigned to a separate controller. If this option is specified, experiment will be run in multiple nodes.')
 84 |     parser.add_argument('-c', '--controller-args', required=False, help='The arguments that will be passed to your controllers. (Ex: -c="--sync_rule EASGD")')
 85 |     parser.add_argument('-w', '--worker-args', required=False, help='The arguments that will be passed to your workers. (Ex: -w="learning_rate=0.1")')
 86 | 
 87 |     return parser.parse_args()
 88 | 
 89 | if __name__ == '__main__':
 90 |     args = parse_arguments()
 91 | 
 92 |     logs_folder = os.path.join("PLATOON_LOGS", args.experiment_name, time.strftime("%Y-%m-%d_%H-%M-%S"))
 93 |     os.makedirs(logs_folder)
 94 | 
 95 |     print("### Launching experiment: {}".format(args.experiment_name))
 96 | 
 97 |     # check for worker executable, else fail
 98 |     if not os.path.isfile("./{}_worker.py".format(args.experiment_name)):
 99 |         print("\nERROR! Cannot find worker executable: {}_worker.py".format(args.experiment_name))
100 |         sys.exit(2)
101 |     # check for custom controller executable, else use default
102 |     if os.path.isfile("./{}_controller.py".format(args.experiment_name)):
103 |         controller_type = args.experiment_name
104 |     else:
105 |         controller_type = "platoon"
106 | 
107 |     # If not specified in launcher, check for other configuration types
108 |     if args.hosts is None:
109 |         try:
110 |             hosts = configparser.fetch_hosts()
111 |         except KeyError:
112 |             hosts = None
113 |     else:
114 |         hosts = args.hosts
115 | 
116 |     # Check if we run on multi-node
117 |     if hosts and len(hosts) > 1:
118 |         print("### Starting multi-node/gpu training on: {} ...".format(' '.join(hosts)), end=' ')
119 |         log_file = os.path.join(logs_folder, "multi-node-controllers.{}")
120 |         env = dict(os.environ)
121 |         theano_flags = "THEANO_FLAGS={0},device={1}".format(env.pop('THEANO_FLAGS', ''), "cpu")
122 |         command = ["mpirun"]
123 |         command += ["--output-filename", log_file.format("log")]
124 |         command += ["-mca", "mpi_warn_on_fork", "0"]
125 |         command += ["-np", str(len(hosts))]
126 |         command += ["-H", ','.join(hosts)]
127 |         command += ["--map-by", "ppr:1:node"]
128 |         command += shlex.split("-x " + " -x ".join(env.keys()) + " -x " + theano_flags)
129 |         if controller_type == "platoon":
130 |             executable = ["-m", "platoon.channel.controller"]
131 |         else:
132 |             executable = ["{}_controller.py".format(controller_type)]
133 |         command += [sys.executable, "-u"] + executable
134 |         command += [args.experiment_name, logs_folder, "--multi"]
135 |         if args.controller_args:
136 |             command += shlex.split(args.controller_args)
137 |         if args.worker_args:
138 |             command += ["-w", args.worker_args]
139 |         try:
140 |             p = subprocess.Popen(command)
141 |         except OSError as exc:
142 |             print("\nERROR! OS error in Popen: {}".format(exc))
143 |             sys.exit(3)
144 |         except Exception as exc:
145 |             print("\nERROR! Other in Popen: {}".format(exc))
146 |             sys.exit(4)
147 |         print("Done")
148 |         experiment_type = "Multi-node Controllers"
149 |     else:
150 |         print("### Starting single-node multi-gpu training")
151 |         if hosts:
152 |             import socket
153 |             hostname = socket.gethostname()
154 |             if hosts[0] != hostname:
155 |                 print("\nERROR! A single host '{0}' was specified which is not "
156 |                       "the same as the current host '{1}'.\nThis is not currently "
157 |                       "supported.".format(hosts[0], hostname))
158 |                 sys.exit(2)
159 |         controller_args = [args.experiment_name, logs_folder, '--single']
160 |         if args.devices:
161 |             controller_args += ['-D']
162 |             controller_args += args.devices
163 |         if args.controller_args:
164 |             controller_args += shlex.split(args.controller_args)
165 |         if args.worker_args:
166 |             controller_args += ["-w={}".format(args.worker_args)]
167 |         try:
168 |             p = launch_process(logs_folder, controller_type, controller_args, "cpu", "controller")
169 |         except OSError as exc:
170 |             print("\nERROR! OS error in Popen: {}".format(exc))
171 |             sys.exit(3)
172 |         except Exception as exc:
173 |             print("\nERROR! Other while launching process: {}".format(exc))
174 |             sys.exit(4)
175 |         experiment_type = "Single-node Controller"
176 | 
177 |     print("\n### Logs folder ###\n{}".format(logs_folder))
178 |     print("\n### Waiting on experiment to finish ...")
179 |     try:
180 |         try:
181 |             pid, status = os.waitpid(p.pid, 0)
182 |         except OSError as exc:
183 |             print("\nERROR! OS error: {}".format(exc))
184 |             sys.exit(3)
185 |         if pid != p.pid:
186 |             print("\nWARNING! Received status for unknown process {}".format(pid))
187 |             sys.exit(3)
188 |         if os.WIFEXITED(status):
189 |             rcode = os.WEXITSTATUS(status)
190 |             print("## {0} terminated with return code: {1}.".format(experiment_type, rcode))
191 |             if rcode != 0:
192 |                 print("\nERROR! An error has occured.\nSee logs for more info.")
193 |                 sys.exit(1)
194 |             else:
195 |                 print("\nSUCCESS! Training with Platoon has finished.")
196 |         else:
197 |             print("\nWARNING! {} changed status but has not exited.".format(experiment_type))
198 |             raise RuntimeError("ERROR! Unexpected controller status change.")
199 |     except (RuntimeError, KeyboardInterrupt) as exc:
200 |         print(exc)
201 |         print("Killing controller processes...")
202 |         try:
203 |             os.kill(p.pid, signal.SIGTERM)
204 |             pid, status = os.waitpid(p.pid, 0)
205 |         except OSError as exc:
206 |             print("\nERROR! OS error: {}".format(exc))
207 |         sys.exit(3)
208 | 


--------------------------------------------------------------------------------
/platoon/training/global_dynamics.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | :mod:`training.global_dynamics` -- Collection of global SGD strategies
  4 | ======================================================================
  5 | 
  6 | .. module:: global_dynamics
  7 |    :platform: Unix
  8 |    :synopsis: Contains :class:`GlobalDynamics` base class for synchronous
  9 |               global gradient descents and implementation of various techniques
 10 |               using Platoon's :class:`channel.worker.Worker`'s
 11 |               :meth:`channel.worker.Worker.all_reduce` interface.
 12 | 
 13 | Implementations
 14 | ---------------
 15 | * *:class:`SGD`* : Synchronous variant of Stochastic Gradient Descent for many
 16 |                    descending particles.
 17 | * *:class:`EASGD`* : Elastic Averaging Stochastic Gradient Descent (synchronous)
 18 | * *:class:`Downpour`* : A synchronous variant of Downpour
 19 | 
 20 | """
 21 | from __future__ import absolute_import, division
 22 | 
 23 | from ..channel.worker import Worker
 24 | from ..ops import AllReduceSum
 25 | 
 26 | 
 27 | class GlobalDynamics(object):
 28 |     """Abstract class which declares the methods and properties that need to
 29 |     be implemented by a synchronous global dynamics rule.
 30 | 
 31 |     Parameters
 32 |     ----------
 33 |     worker : :class:`channel.Worker`, optional
 34 |        A reference to Worker's instance
 35 | 
 36 |     .. versionadded:: 0.6.0
 37 | 
 38 |     """
 39 |     def __init__(self, worker=None):
 40 |         self._worker = None
 41 |         if worker is not None:
 42 |             self.worker = worker
 43 |         self._fn = None
 44 | 
 45 |     def __call__(self):
 46 |         if self._fn is None:
 47 |             raise NotImplementedError("Functionality has not been specified.\n"
 48 |                                       "Please use {} method to setup GlobalDynamics"
 49 |                                       "for a set of Variables\nor supply your own"
 50 |                                       "using {} method.".format(
 51 |                                           repr(self.make_rule), repr(self.fn)))
 52 |         self._fn()
 53 | 
 54 |     @property
 55 |     def worker(self):
 56 |         """Worker class instance used for global operations"""
 57 |         if self._worker is None:
 58 |             try:
 59 |                 self._worker = Worker()  # Draw singleton instance
 60 |             except TypeError:
 61 |                 raise AttributeError("Worker instance has not been created yet.")
 62 |         return self._worker
 63 | 
 64 |     @worker.setter
 65 |     def worker(self, inst):
 66 |         if not isinstance(inst, Worker):
 67 |             raise TypeError("Argument `inst` is not of platoon.Worker type.")
 68 |         self._worker = inst
 69 | 
 70 |     def register_fn(self, fun):
 71 |         """Internal function implementing global dynamics. Does not accept
 72 |         parameters. Global optimization must be done through shared variables.
 73 | 
 74 |         The responsibility for supplying a valid internal function falls to the
 75 |         user. It must be able to be called like this: ``fun()``. Also in order
 76 |         to serve its purpose, it needs to have multi-GPU or even multi-node
 77 |         functionality. As a result, a :class:`channel.Worker` or other interface
 78 |         need to be used.
 79 | 
 80 |         :param fun: Implements global dynamics by using information
 81 |                     from many workers.
 82 |         :type fun: callable
 83 | 
 84 |         """
 85 |         if not hasattr(fun, '__call__'):
 86 |             raise TypeError("Supplied object is not a callable.")
 87 |         self._fn = fun
 88 | 
 89 |     def make_rule(self, *args):
 90 |         """
 91 |         Create :class:`GlobalDynamics` optimization function for
 92 |         local data in `args`.
 93 | 
 94 |         Implementation in a child class must return a callable object which
 95 |         expects no arguments. User must be careful to create a function which
 96 |         uses shared objects in order to update local model parameters, such as
 97 |         Theano Shared Variables.
 98 | 
 99 |         Notes
100 |         -----
101 |         For better performance, try to batch together in the same
102 |         :ref:`theano.compile.SharedVariable` as many model parameter arrays as
103 |         possible. This reduces the number of calls and utilizes the most out of
104 |         the underlying algorithms. One way to do this is to create one c
105 |         contiguous array that contains every set (matrix) of model parameters
106 |         along the first dimension. Then in order to use each set separately,
107 |         create as many view arrays as the number of sets of model parameters,
108 |         i.e. the length of the first dimension. Use the whole array as an input
109 |         to the :meth:`make_rule` function!
110 | 
111 |         """
112 |         raise NotImplementedError(self.make_rule.__doc__)
113 | 
114 | 
115 | class _GlobalDynamicsNoSet(GlobalDynamics):
116 |     def register_fn(self, fun):
117 |         raise AttributeError("Cannot set internal function. Use {} method.".format(
118 |             repr(self.make_rule)))
119 | 
120 | 
121 | class SGD(_GlobalDynamicsNoSet):
122 |     """Synchronous Stochastic Gradient Descent:
123 | 
124 |     It sums or averages model parameter updates found separately (and
125 |     concurrently) by workers which are training on (different) random
126 |     mini-batches of a dataset.
127 | 
128 |     Parameters
129 |     ----------
130 |     average : bool, optional
131 |        If True, it will normalize the summation of model param updates across
132 |        all workers with the number of workers participating in optimization.
133 |     worker : :class:`channel.Worker`
134 |        See :class:`GlobalDynamics`.
135 | 
136 |     .. versionadded:: 0.6.0
137 | 
138 |     """
139 |     def __init__(self, average=False, worker=None):
140 |         self.average = average
141 |         super(SGD, self).__init__(worker)
142 | 
143 |     def make_rule(self, local_updates):
144 |         """Makes global synchronous SGD rule for the parameters in `local_updates`.
145 | 
146 |         Parameters
147 |         ----------
148 |         local_updates : {:ref:`theano.compile.SharedVariable`,
149 |                          list of :ref:`theano.compile.SharedVariable`}
150 |            These variables represent the updates found
151 |            by local optimization dynamics on the model's parameters.
152 | 
153 |         .. seealso:: Notes on :meth:`GlobalDynamics.make_rule`
154 | 
155 |         """
156 |         import theano
157 |         if isinstance(local_updates, theano.compile.SharedVariable):
158 |             local_updates = [local_updates]
159 |         global_updates = []
160 |         for update in local_updates:
161 |             gup = AllReduceSum(update, inplace=True)
162 |             if self.average:
163 |                 gup /= self.worker.global_size
164 |             global_updates.append(gup)
165 |         self._fn = theano.function([], [],
166 |                                    updates=list(zip(local_updates, global_updates)),
167 |                                    accept_inplace=True)
168 | 
169 | 
170 | def SumSGD(worker=None):
171 |     """Synchronous Stochastic Gradient Descent: summing version
172 | 
173 |     .. seealso:: Class :class:`SGD`
174 |     .. versionadded:: 0.6.0
175 | 
176 |     """
177 |     return SGD(average=False, worker=worker)
178 | 
179 | 
180 | def AverageSGD(worker=None):
181 |     """Synchronous Stochastic Gradient Descent: averaging version
182 | 
183 |     .. seealso:: Class :class:`SGD`
184 |     .. versionadded:: 0.6.0
185 | 
186 |     """
187 |     return SGD(average=True, worker=worker)
188 | 
189 | 
190 | class EASGD(_GlobalDynamicsNoSet):
191 |     """Synchronous variant of Elastic Averaging Stochastic Gradient Descent
192 | 
193 |     This algorithm is described in more details in the following paper:
194 |     http://arxiv.org/abs/1412.6651
195 | 
196 |     .. seealso:: Class :class:`GlobalDynamics` for parameters
197 |     .. versionadded:: 0.6.0
198 | 
199 |     """
200 |     def make_rule(self, local_particle, central_particle, alpha):
201 |         """Make EASGD rule.
202 | 
203 |         According to this rule, every N iterations, a worker synchronizes his
204 |         parameters with the master parameters. This is done by moving each set of
205 |         parameters toward the other by an amount proportional to the difference
206 |         between the individual params (this proportion is parameterized by `alpha`).
207 | 
208 |         Parameters
209 |         ----------
210 |         local_particle : {:ref:`theano.compile.SharedVariable`,
211 |                           list of :ref:`theano.compile.SharedVariable`}
212 |            A particle's position in parameter space doing local SGD.
213 |         central_particle : {:ref:`theano.compile.SharedVariable`,
214 |                             list of :ref:`theano.compile.SharedVariable`}
215 |            Central particle's position in parameter space interacting with
216 |            local particles.
217 |         alpha: scalar
218 |            "Elastic" force's coefficient
219 | 
220 |         .. note::
221 |            If `alpha` == 0 is used, there is no synchronization of the
222 |            parameters meaning that each worker is independently training using SGD.
223 | 
224 |         .. seealso:: Notes on :meth:`GlobalDynamics.make_rule`
225 | 
226 |         """
227 |         import theano
228 |         if isinstance(local_particle, theano.compile.SharedVariable):
229 |             local_particle = [local_particle]
230 |         if isinstance(central_particle, theano.compile.SharedVariable):
231 |             central_particle = [central_particle]
232 |         self.alpha = alpha
233 | 
234 |         new_local = []
235 |         new_central = []
236 |         for local_position, central_position in zip(local_particle, central_particle):
237 |             distance = local_position - central_position
238 |             elastic_force = alpha * distance
239 |             # Note: not equivalent to physical force as `elastic_force`:=Δx/Δt
240 |             # and not Δp/Δt
241 |             local_new_position = local_position - elastic_force
242 |             total_elastic_force = AllReduceSum(elastic_force, inplace=True)
243 |             central_new_position = central_position + total_elastic_force
244 | 
245 |             new_local.append(local_new_position)
246 |             new_central.append(central_new_position)
247 | 
248 |         updates = list(zip(local_particle, new_local)) + \
249 |             list(zip(central_particle, new_central))
250 |         self._fn = theano.function([], [], updates=updates, accept_inplace=True)
251 | 
252 | 
253 | class Downpour(_GlobalDynamicsNoSet):
254 |     """Synchronous variant of Downpour distributed optimization technique
255 | 
256 |     This algorithm is described in details in the following paper:
257 |     http://research.google.com/archive/large_deep_networks_nips2012.html
258 | 
259 |     Parameters
260 |     ----------
261 |     average : bool, optional
262 |        If True, it will average the sum of locally accumulated parameter updates
263 |        in every global update.
264 |     worker : :class:`channel.Worker`, optional
265 |        See :class:`GlobalDynamics`.
266 | 
267 |     .. versionadded:: 0.6.0
268 | 
269 |     """
270 |     def __init__(self, average=False, worker=None):
271 |         self.average = average
272 |         super(Downpour, self).__init__(worker)
273 | 
274 |     def make_rule(self, local_particle, local_acc_updates, global_particle):
275 |         """Make Downpour rule.
276 | 
277 |         All particles along with the global particle start from the same
278 |         position. According to this rule, each local particle executes descent
279 |         normally but their parameter updates are accumulated (e.g. by moving
280 |         average) to a variable. Every N iterations, the local accumulated
281 |         updates are added together and applied to the global particle. Each
282 |         local particle restarts from global particle's position.
283 | 
284 |         Parameters
285 |         ----------
286 |         local_particle : {:ref:`theano.compile.SharedVariable`,
287 |                           list of :ref:`theano.compile.SharedVariable`}
288 |            A particle's position in parameter space doing local SGD.
289 |         local_acc_updates : {:ref:`theano.compile.SharedVariable`,
290 |                              list of :ref:`theano.compile.SharedVariable`}
291 |            Shared variable accumulating local parameter updates.
292 |         global_particle : {:ref:`theano.compile.SharedVariable`,
293 |                            list of :ref:`theano.compile.SharedVariable`}
294 |            A particle whose position is updated only by the Downpour process and
295 |            resets position of local particles.
296 | 
297 |         .. seealso:: Notes on :meth:`GlobalDynamics.make_rule`
298 | 
299 |         """
300 |         import theano
301 |         from theano.tensor import basic
302 |         if isinstance(local_particle, theano.compile.SharedVariable):
303 |             local_particle = [local_particle]
304 |         if isinstance(local_acc_updates, theano.compile.SharedVariable):
305 |             local_acc_updates = [local_acc_updates]
306 |         if isinstance(global_particle, theano.compile.SharedVariable):
307 |             global_particle = [global_particle]
308 | 
309 |         new_global = []
310 |         new_local = []
311 |         new_acc_updates = []
312 |         for lp, lau, gp in zip(local_particle, local_acc_updates, global_particle):
313 |             global_acc_updates = AllReduceSum(lau, inplace=True)
314 |             if self.average:
315 |                 global_acc_updates /= self.worker.global_size
316 |             new_global.append(gp + global_acc_updates)
317 |             new_local.append(new_global[-1])
318 |             new_acc_updates.append(basic.zeros_like(lau))
319 | 
320 |         updates = list(zip(local_particle, new_local)) + \
321 |             list(zip(local_acc_updates, new_acc_updates)) + \
322 |             list(zip(global_particle, new_global))
323 | 
324 |         self._fn = theano.function([], [], updates=updates, accept_inplace=True)
325 | 


--------------------------------------------------------------------------------
/doc/platoon/control_request.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="1760" height="1360"><g transform="translate(0 0)"><path d="M0 0h1760v1360H0z" fill="#fff"/><path d="M500 145c0-2.76 2.24-5 5-5h150c2.76 0 5 2.24 5 5v110c0 2.76-2.24 5-5 5H505c-2.76 0-5-2.24-5-5z" stroke="#666" stroke-width="2" fill="#99d2f2"/><use xlink:href="#a" transform="matrix(1,0,0,1,500,140) translate(46.666666666666664 62.97222222222222)"/><path d="M580 260v550.15" fill="none"/><path d="M580 260v10.2m0 10.18v20.37m0 10.2v20.37m0 10.18v20.38m0 10.2v20.36m0 10.2V423m0 10.2v20.37m0 10.2v20.36m0 10.2v20.37m0 10.2v20.36m0 10.2v20.37m0 10.18v20.4m0 10.18v20.37m0 10.2v20.37m0 10.18v20.38m0 10.2v20.36m0 10.2v20.37m0 10.2v20.37m0 10.2v10.18" stroke="#999" stroke-width="2" fill="none"/><path d="M560 325c0-2.76 2.24-5 5-5h30c2.76 0 5 2.24 5 5v430c0 2.76-2.24 5-5 5h-30c-2.76 0-5-2.24-5-5z" stroke="#666" stroke-width="2" fill="#f2f2f2"/><path d="M750 145c0-2.76 2.24-5 5-5h170c2.76 0 5 2.24 5 5v110c0 2.76-2.24 5-5 5H755c-2.76 0-5-2.24-5-5z" stroke="#666" stroke-width="2" fill="#a3d977"/><use xlink:href="#b" transform="matrix(1,0,0,1,750,140) translate(24.93827160493828 62.97222222222222)"/><path d="M840 260v555.2" fill="none"/><path d="M840 260v9.74m0 9.74v19.48m0 9.74v19.48m0 9.74v19.48m0 9.74v19.48m0 9.75v19.48m0 9.74v19.47m0 9.74v19.5m0 9.73v19.48m0 9.75v19.48m0 9.74v19.48m0 9.74v19.47m0 9.74v19.5m0 9.73v19.48m0 9.75v19.5m0 9.73v19.48m0 9.74v19.48m0 9.74v19.48m0 9.74v19.48m0 9.74v9.74" stroke="#999" stroke-width="2" fill="none"/><path d="M820 385c0-2.76 2.24-5 5-5h30c2.76 0 5 2.24 5 5v50c0 2.76-2.24 5-5 5h-30c-2.76 0-5-2.24-5-5z" stroke="#666" stroke-width="2" fill="#f2f2f2"/><path d="M600 390h220" stroke="#999" stroke-width="2" fill="none"/><path d="M820 390l-14.27 4.64v-9.28z" fill="#999"/><path d="M820 388.95v2.1L804.73 396v-12zm-13.27 4.3l10.03-3.25-10.03-3.26z" fill="#999"/><use xlink:href="#c" transform="matrix(1,0,0,1,658.65,364.3999999999997) translate(0 14.4)"/><path d="M32 37c0-2.76 2.24-5 5-5h838c2.76 0 5 2.24 5 5v38c0 2.76-2.24 5-5 5H37c-2.76 0-5-2.24-5-5z" fill="none"/><use xlink:href="#d" transform="matrix(1,0,0,1,32,32) translate(0 31.11111111111111)"/><use xlink:href="#e" transform="matrix(1,0,0,1,32,32) translate(184.49382716049382 31.11111111111111)"/><path d="M31.33 80H1728M180 320h380" stroke="#999" stroke-width="2" fill="none"/><path d="M560 320l-14.27 4.64v-9.28z" stroke="#999" stroke-width="2" fill="#999"/><use xlink:href="#f" transform="matrix(1,0,0,1,310.15,294.4) translate(0 14.4)"/><path d="M1240 147c0-2.76 2.24-5 5-5h150c2.76 0 5 2.24 5 5v110c0 2.76-2.24 5-5 5h-150c-2.76 0-5-2.24-5-5z" stroke="#666" stroke-width="2" fill="#ff8f80"/><use xlink:href="#g" transform="matrix(1,0,0,1,1240,142) translate(34.71604938271604 62.97222222222222)"/><path d="M1314.33 263v550.52" fill="none"/><path d="M1314.33 263v10.2m0 10.2v20.38m0 10.2v20.38m0 10.2v20.4m0 10.18v20.4m0 10.2v20.38m0 10.2v20.38m0 10.2v20.38m0 10.2v20.4m0 10.18v20.4m0 10.2v20.38m0 10.2v20.38m0 10.2v20.4m0 10.18v20.4m0 10.18v20.4m0 10.2v20.38m0 10.2v20.38m0 10.2v20.4m0 10.18v10.2" stroke="#999" stroke-width="2" fill="none"/><path d="M1315.33 263h-2v-1h2z" fill="#999"/><path d="M1294.33 585c0-2.76 2.24-5 5-5h30c2.76 0 5 2.24 5 5v50c0 2.76-2.24 5-5 5h-30c-2.76 0-5-2.24-5-5z" stroke="#666" stroke-width="2" fill="#f2f2f2"/><path d="M1083 580h211.33" stroke="#999" stroke-width="2" fill="none"/><path d="M1294.33 580l-14.26 4.64v-9.28z" stroke="#999" stroke-width="2" fill="#999"/><use xlink:href="#h" transform="matrix(1,0,0,1,1036.6333333333332,554.4000000000008) translate(0 14.4)"/><use xlink:href="#i" transform="matrix(1,0,0,1,1036.6333333333332,554.4000000000008) translate(172.55 14.4)"/><path d="M1293.64 636.98H1080" stroke="#999" stroke-width="2" fill="none"/><path d="M1294.6 636.58l1 1.4h-1.96v-2h.85z" fill="#999"/><path d="M1080 636.98l14.27-4.64v9.27z" stroke="#999" stroke-width="2" fill="#fff"/><use xlink:href="#j" transform="matrix(1,0,0,1,1090.3416666666667,611.3763470000001) translate(0 14.4)"/><path d="M820 505c0-2.76 2.24-5 5-5h30c2.76 0 5 2.24 5 5v230c0 2.76-2.24 5-5 5h-30c-2.76 0-5-2.24-5-5z" stroke="#666" stroke-width="2" fill="#f2f2f2"/><path d="M601 503.27h218.5" stroke="#999" stroke-width="2" fill="none"/><path d="M601 504.27h-1v-2h1zM819.5 503.27l-14.25 4.64v-9.27z" fill="#999"/><path d="M820.77 502.63l-.46 1.43-16.05 5.22v-12.02zm-14.52 3.9l10.03-3.26-10.03-3.26z" fill="#999"/><use xlink:href="#k" transform="matrix(1,0,0,1,691.7565985331123,477.66991959721184) translate(0 14.4)"/><path d="M819.3 736.98H601" stroke="#999" stroke-width="2" fill="none"/><path d="M820.25 736.58l1.02 1.4h-1.96v-2h.86z" fill="#999"/><path d="M615.27 741.6L601 737l14.27-4.64z" fill="#fff"/><path d="M616.27 743L600 737.7v-1.45l16.27-5.3zm-12.03-6.02l10.03 3.26v-6.52z" fill="#999"/><use xlink:href="#j" transform="matrix(1,0,0,1,646.4088170485039,711.3763470000001) translate(0 14.4)"/><path d="M559 755.08H180.33" stroke="#999" stroke-width="2" fill="none"/><path d="M560 755l.17 1.08H559v-2h1z" fill="#999"/><path d="M180.33 755.08l14.27-4.64v9.28z" stroke="#999" stroke-width="2" fill="#fff"/><g><use xlink:href="#j" transform="matrix(1,0,0,1,294.3416666666665,729.4801279999999) translate(0 14.4)"/></g><defs><path fill="#333" d="M35-132v-50h50v50H35zM35 0v-49h50V0H35" id="l"/><path fill="#333" d="M275 0h-61l-44-196L126 0H64L0-248h53L97-49l45-199h58l43 199 44-199h52" id="m"/><path fill="#333" d="M110-194c64 0 96 36 96 99 0 64-35 99-97 99-61 0-95-36-95-99 0-62 34-99 96-99zm-1 164c35 0 45-28 45-65 0-40-10-65-43-65-34 0-45 26-45 65 0 36 10 65 43 65" id="n"/><path fill="#333" d="M135-150c-39-12-60 13-60 57V0H25l-1-190h47c2 13-1 29 3 40 6-28 27-53 61-41v41" id="o"/><path fill="#333" d="M147 0L96-86 75-71V0H25v-261h50v150l67-79h53l-66 74L201 0h-54" id="p"/><path fill="#333" d="M185-48c-13 30-37 53-82 52C43 2 14-33 14-96s30-98 90-98c62 0 83 45 84 108H66c0 31 8 55 39 56 18 0 30-7 34-22zm-45-69c5-46-57-63-70-21-2 6-4 13-4 21h74" id="q"/><g id="a"><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,0,0)" xlink:href="#l"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,5.8765432098765435,0)" xlink:href="#m"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,22.271604938271604,0)" xlink:href="#n"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,33.08641975308642,0)" xlink:href="#o"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,40,0)" xlink:href="#p"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,49.876543209876544,0)" xlink:href="#q"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,59.75308641975309,0)" xlink:href="#o"/></g><path fill="#333" d="M190-63c-7 42-38 67-86 67-59 0-84-38-90-98-12-110 154-137 174-36l-49 2c-2-19-15-32-35-32-30 0-35 28-38 64-6 74 65 87 74 30" id="r"/><path fill="#333" d="M137-138c1-29-70-34-71-4 15 46 118 7 119 86 1 83-164 76-172 9l43-7c4 19 20 25 44 25 33 8 57-30 24-41C81-84 22-81 20-136c-2-80 154-74 161-7" id="s"/><path fill="#333" d="M115-3C79 11 28 4 28-45v-112H4v-33h27l15-45h31v45h36v33H77v99c-1 23 16 31 38 25v30" id="t"/><path fill="#333" d="M169-182c-1-43-94-46-97-3 18 66 151 10 154 114 3 95-165 93-204 36-6-8-10-19-12-30l50-8c3 46 112 56 116 5-17-69-150-10-154-114-4-87 153-88 188-35 5 8 8 18 10 28" id="u"/><g id="b"><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,0,0)" xlink:href="#r"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,9.876543209876544,0)" xlink:href="#s"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,19.75308641975309,0)" xlink:href="#n"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,30.567901234567906,0)" xlink:href="#r"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,40.44444444444445,0)" xlink:href="#p"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,50.320987654320994,0)" xlink:href="#q"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,60.19753086419754,0)" xlink:href="#t"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,66.07407407407408,0)" xlink:href="#l"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,71.95061728395062,0)" xlink:href="#u"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,83.80246913580247,0)" xlink:href="#n"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,94.61728395061728,0)" xlink:href="#r"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,104.49382716049381,0)" xlink:href="#p"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,114.37037037037035,0)" xlink:href="#q"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,124.2469135802469,0)" xlink:href="#t"/></g><path fill="#333" d="M135-194c87-1 58 113 63 194h-50c-7-57 23-157-34-157-59 0-34 97-39 157H25l-1-190h47c2 12-1 28 3 38 12-26 28-41 61-42" id="v"/><path fill="#333" d="M88-194c31-1 46 15 58 34l-1-101h50l1 261h-48c-2-10 0-23-3-31C134-8 116 4 84 4 32 4 16-41 15-95c0-56 19-97 73-99zm17 164c33 0 40-30 41-66 1-37-9-64-41-64s-38 30-39 65c0 43 13 65 39 65" id="w"/><path fill="#333" d="M67-93c0 74 22 123 53 168H70C40 30 18-18 18-93s22-123 52-168h50c-32 44-53 94-53 168" id="x"/><path fill="#333" d="M84 4C32 4 15-41 15-95c0-55 18-99 73-99 29 0 47 12 58 34l2-30h48l-1 265h-49V-32C136-9 114 4 84 4zm21-34c32 0 41-29 41-66 0-36-9-64-40-64-33 0-39 30-40 65 0 43 13 65 39 65" id="y"/><path fill="#333" d="M85 4C-2 5 27-109 22-190h50c7 57-23 150 33 157 60-5 35-97 40-157h50l1 190h-47c-2-12 1-28-3-38-12 25-28 42-61 42" id="z"/><path fill="#333" d="M123 10C108 53 80 86 19 72V37c35 8 53-11 59-39L3-190h52l48 148c12-52 28-100 44-148h51" id="A"/><path fill="#333" d="M102-93c0 74-22 123-52 168H0C30 29 54-18 53-93c0-74-22-123-53-168h50c30 45 52 94 52 168" id="B"/><g id="c"><use transform="matrix(0.05,0,0,0.05,0,0)" xlink:href="#s"/><use transform="matrix(0.05,0,0,0.05,10,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,20,0)" xlink:href="#v"/><use transform="matrix(0.05,0,0,0.05,30.950000000000003,0)" xlink:href="#w"/><use transform="matrix(0.05,0,0,0.05,41.900000000000006,0)" xlink:href="#x"/><use transform="matrix(0.05,0,0,0.05,47.85000000000001,0)" xlink:href="#y"/><use transform="matrix(0.05,0,0,0.05,58.80000000000001,0)" xlink:href="#z"/><use transform="matrix(0.05,0,0,0.05,69.75000000000001,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,79.75000000000001,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,86.75000000000001,0)" xlink:href="#A"/><use transform="matrix(0.05,0,0,0.05,96.75000000000001,0)" xlink:href="#B"/></g><path fill="#999" d="M67-125c0 53 21 87 73 88 37 1 54-22 65-47l45 17C233-25 199 4 140 4 58 4 20-42 15-125 8-235 124-281 211-232c18 10 29 29 36 50l-46 12c-8-25-30-41-62-41-52 0-71 34-72 86" id="C"/><path fill="#999" d="M140-251c80 0 125 45 125 126S219 4 139 4C58 4 15-44 15-125s44-126 125-126zm-1 214c52 0 73-35 73-88 0-50-21-86-72-86-52 0-73 35-73 86s22 88 72 88" id="D"/><path fill="#999" d="M175 0L67-191c6 58 2 128 3 191H24v-248h59L193-55c-6-58-2-129-3-193h46V0h-61" id="E"/><path fill="#999" d="M136-208V0H84v-208H4v-40h212v40h-80" id="F"/><path fill="#999" d="M240-174c0 40-23 61-54 70L253 0h-59l-57-94H76V0H24v-248c93 4 217-23 216 74zM76-134c48-2 112 12 112-38 0-48-66-32-112-35v73" id="G"/><path fill="#999" d="M24 0v-248h52v208h133V0H24" id="H"/><g id="d"><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,0,0)" xlink:href="#C"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,25.58024691358025,0)" xlink:href="#D"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,53.23456790123457,0)" xlink:href="#E"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,78.81481481481482,0)" xlink:href="#F"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,100.44444444444446,0)" xlink:href="#G"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,126.0246913580247,0)" xlink:href="#D"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,153.67901234567904,0)" xlink:href="#H"/></g><path fill="#999" d="M24 0v-248h195v40H76v63h132v40H76v65h150V0H24" id="I"/><path fill="#999" d="M140-251c80 0 125 45 125 126 0 70-33 111-92 124 6 28 34 38 68 31l-1 36C177 83 129 55 120 2 51-6 15-50 15-125c0-81 44-126 125-126zm-1 214c52 0 73-35 73-88 0-50-21-86-72-86-52 0-73 35-73 86s22 88 72 88" id="J"/><path fill="#999" d="M238-95c0 69-44 99-111 99C63 4 22-25 22-93v-155h51v151c-1 38 19 59 55 60 90 1 49-130 58-211h52v153" id="K"/><path fill="#999" d="M169-182c-1-43-94-46-97-3 18 66 151 10 154 114 3 95-165 93-204 36-6-8-10-19-12-30l50-8c3 46 112 56 116 5-17-69-150-10-154-114-4-87 153-88 188-35 5 8 8 18 10 28" id="L"/><g id="e"><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,0,0)" xlink:href="#G"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,25.58024691358025,0)" xlink:href="#I"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,49.283950617283956,0)" xlink:href="#J"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,76.93827160493828,0)" xlink:href="#K"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,102.51851851851853,0)" xlink:href="#I"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,126.22222222222223,0)" xlink:href="#L"/><use transform="matrix(0.09876543209876544,0,0,0.09876543209876544,149.92592592592595,0)" xlink:href="#F"/></g><path fill="#333" d="M-4 44V30h207v14H-4" id="M"/><g id="f"><use transform="matrix(0.05,0,0,0.05,0,0)" xlink:href="#s"/><use transform="matrix(0.05,0,0,0.05,10,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,20,0)" xlink:href="#v"/><use transform="matrix(0.05,0,0,0.05,30.950000000000003,0)" xlink:href="#w"/><use transform="matrix(0.05,0,0,0.05,41.900000000000006,0)" xlink:href="#M"/><use transform="matrix(0.05,0,0,0.05,51.900000000000006,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,58.900000000000006,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,68.9,0)" xlink:href="#y"/><use transform="matrix(0.05,0,0,0.05,79.85000000000001,0)" xlink:href="#x"/><use transform="matrix(0.05,0,0,0.05,85.80000000000001,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,92.80000000000001,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,102.80000000000001,0)" xlink:href="#y"/><use transform="matrix(0.05,0,0,0.05,113.75,0)" xlink:href="#B"/></g><path fill="#333" d="M67-125c0 53 21 87 73 88 37 1 54-22 65-47l45 17C233-25 199 4 140 4 58 4 20-42 15-125 8-235 124-281 211-232c18 10 29 29 36 50l-46 12c-8-25-30-41-62-41-52 0-71 34-72 86" id="N"/><path fill="#333" d="M25 0v-261h50V0H25" id="O"/><g id="g"><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,0,0)" xlink:href="#l"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,5.8765432098765435,0)" xlink:href="#N"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,18.666666666666668,0)" xlink:href="#n"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,29.481481481481477,0)" xlink:href="#v"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,40.2962962962963,0)" xlink:href="#t"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,46.17283950617284,0)" xlink:href="#o"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,53.086419753086425,0)" xlink:href="#n"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,63.90123456790124,0)" xlink:href="#O"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,68.83950617283952,0)" xlink:href="#O"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,73.7777777777778,0)" xlink:href="#q"/><use transform="matrix(0.04938271604938272,0,0,0.04938271604938272,83.65432098765434,0)" xlink:href="#o"/></g><path fill="#333" d="M114-157C55-157 80-60 75 0H25v-261h50l-1 109c12-26 28-41 61-42 86-1 58 113 63 194h-50c-7-57 23-157-34-157" id="P"/><path fill="#333" d="M133-34C117-15 103 5 69 4 32 3 11-16 11-54c-1-60 55-63 116-61 1-26-3-47-28-47-18 1-26 9-28 27l-52-2c7-38 36-58 82-57s74 22 75 68l1 82c-1 14 12 18 25 15v27c-30 8-71 5-69-32zm-48 3c29 0 43-24 42-57-32 0-66-3-65 30 0 17 8 27 23 27" id="Q"/><path fill="#333" d="M76-54c-1 42 2 86-19 110H24C36 42 46 24 48 0H25v-54h51" id="R"/><g id="h"><use transform="matrix(0.05,0,0,0.05,0,0)" xlink:href="#P"/><use transform="matrix(0.05,0,0,0.05,10.950000000000001,0)" xlink:href="#Q"/><use transform="matrix(0.05,0,0,0.05,20.950000000000003,0)" xlink:href="#v"/><use transform="matrix(0.05,0,0,0.05,31.900000000000006,0)" xlink:href="#w"/><use transform="matrix(0.05,0,0,0.05,42.85000000000001,0)" xlink:href="#O"/><use transform="matrix(0.05,0,0,0.05,47.85000000000001,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,57.85,0)" xlink:href="#M"/><use transform="matrix(0.05,0,0,0.05,67.85000000000001,0)" xlink:href="#r"/><use transform="matrix(0.05,0,0,0.05,77.85000000000001,0)" xlink:href="#n"/><use transform="matrix(0.05,0,0,0.05,88.80000000000001,0)" xlink:href="#v"/><use transform="matrix(0.05,0,0,0.05,99.75000000000001,0)" xlink:href="#t"/><use transform="matrix(0.05,0,0,0.05,105.7,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,112.7,0)" xlink:href="#n"/><use transform="matrix(0.05,0,0,0.05,123.65000000000003,0)" xlink:href="#O"/><use transform="matrix(0.05,0,0,0.05,128.65000000000003,0)" xlink:href="#x"/><use transform="matrix(0.05,0,0,0.05,134.60000000000002,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,141.60000000000002,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,151.60000000000002,0)" xlink:href="#y"/><use transform="matrix(0.05,0,0,0.05,162.55,0)" xlink:href="#R"/></g><path fill="#333" d="M231 0h-52l-39-155L100 0H48L-1-190h46L77-45c9-52 24-97 36-145h53l37 145 32-145h46" id="S"/><path fill="#333" d="M25-224v-37h50v37H25zM25 0v-190h50V0H25" id="T"/><g id="i"><use transform="matrix(0.05,0,0,0.05,0,0)" xlink:href="#S"/><use transform="matrix(0.05,0,0,0.05,14,0)" xlink:href="#n"/><use transform="matrix(0.05,0,0,0.05,24.950000000000003,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,31.950000000000003,0)" xlink:href="#p"/><use transform="matrix(0.05,0,0,0.05,41.95,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,51.95,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,58.95,0)" xlink:href="#M"/><use transform="matrix(0.05,0,0,0.05,68.95,0)" xlink:href="#T"/><use transform="matrix(0.05,0,0,0.05,73.95,0)" xlink:href="#w"/><use transform="matrix(0.05,0,0,0.05,84.9,0)" xlink:href="#B"/></g><path fill="#333" d="M135-194c53 0 70 44 70 98 0 56-19 98-73 100-31 1-45-17-59-34 3 33 2 69 2 105H25l-1-265h48c2 10 0 23 3 31 11-24 29-35 60-35zM114-30c33 0 39-31 40-66 0-38-9-64-40-64-56 0-55 130 0 130" id="U"/><g id="j"><use transform="matrix(0.05,0,0,0.05,0,0)" xlink:href="#r"/><use transform="matrix(0.05,0,0,0.05,10,0)" xlink:href="#n"/><use transform="matrix(0.05,0,0,0.05,20.950000000000003,0)" xlink:href="#v"/><use transform="matrix(0.05,0,0,0.05,31.900000000000006,0)" xlink:href="#t"/><use transform="matrix(0.05,0,0,0.05,37.85000000000001,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,44.85000000000001,0)" xlink:href="#n"/><use transform="matrix(0.05,0,0,0.05,55.80000000000001,0)" xlink:href="#O"/><use transform="matrix(0.05,0,0,0.05,60.80000000000001,0)" xlink:href="#M"/><use transform="matrix(0.05,0,0,0.05,70.80000000000001,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,77.80000000000001,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,87.80000000000001,0)" xlink:href="#s"/><use transform="matrix(0.05,0,0,0.05,97.80000000000001,0)" xlink:href="#U"/><use transform="matrix(0.05,0,0,0.05,108.75,0)" xlink:href="#n"/><use transform="matrix(0.05,0,0,0.05,119.7,0)" xlink:href="#v"/><use transform="matrix(0.05,0,0,0.05,130.65,0)" xlink:href="#s"/><use transform="matrix(0.05,0,0,0.05,140.65,0)" xlink:href="#q"/></g><path fill="#333" d="M128 0H69L1-190h53L99-40l48-150h52" id="V"/><g id="k"><use transform="matrix(0.05,0,0,0.05,0,0)" xlink:href="#o"/><use transform="matrix(0.05,0,0,0.05,7,0)" xlink:href="#q"/><use transform="matrix(0.05,0,0,0.05,17,0)" xlink:href="#r"/><use transform="matrix(0.05,0,0,0.05,27,0)" xlink:href="#V"/></g></defs></g></svg>


--------------------------------------------------------------------------------
/example/synchronous_lstm/lstm_worker.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Build a tweet sentiment analyzer
  3 | '''
  4 | from __future__ import absolute_import, print_function
  5 | from collections import OrderedDict
  6 | import sys
  7 | import argparse
  8 | 
  9 | import six
 10 | from six import iteritems
 11 | from six.moves import range
 12 | 
 13 | import numpy
 14 | import theano
 15 | from theano import config
 16 | import theano.tensor as tensor
 17 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 18 | 
 19 | import os
 20 | 
 21 | sys.path.append(os.path.dirname(__file__))
 22 | import imdb
 23 | 
 24 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
 25 | from platoon.channel import Worker
 26 | from platoon.training.global_dynamics import AverageSGD
 27 | 
 28 | worker = None
 29 | datasets = {'imdb': (imdb.load_data, imdb.prepare_data)}
 30 | 
 31 | 
 32 | def numpy_floatX(data):
 33 |     return numpy.asarray(data, dtype=config.floatX)
 34 | 
 35 | 
 36 | def get_minibatches_idx(n, minibatch_size, shuffle=False):
 37 |     """
 38 |     Used to shuffle the dataset at each iteration.
 39 |     """
 40 | 
 41 |     idx_list = numpy.arange(n, dtype="int32")
 42 | 
 43 |     if shuffle:
 44 |         numpy.random.shuffle(idx_list)
 45 | 
 46 |     minibatches = []
 47 |     minibatch_start = 0
 48 |     for i in range(n // minibatch_size):
 49 |         minibatches.append(idx_list[minibatch_start:
 50 |                                     minibatch_start + minibatch_size])
 51 |         minibatch_start += minibatch_size
 52 | 
 53 |     if (minibatch_start != n):
 54 |         # Make a minibatch out of what is left
 55 |         minibatches.append(idx_list[minibatch_start:])
 56 | 
 57 |     return zip(range(len(minibatches)), minibatches)
 58 | 
 59 | 
 60 | def get_dataset(name):
 61 |     return datasets[name][0], datasets[name][1]
 62 | 
 63 | 
 64 | def zipp(params, tparams):
 65 |     """
 66 |     When we reload the model. Needed for the GPU stuff.
 67 |     """
 68 |     for kk, vv in iteritems(params):
 69 |         tparams[kk].set_value(vv)
 70 | 
 71 | 
 72 | def unzip(zipped):
 73 |     """
 74 |     When we pickle the model. Needed for the GPU stuff.
 75 |     """
 76 |     new_params = OrderedDict()
 77 |     for kk, vv in iteritems(zipped):
 78 |         new_params[kk] = vv.get_value()
 79 |     return new_params
 80 | 
 81 | 
 82 | def dropout_layer(state_before, use_noise, trng):
 83 |     proj = tensor.switch(use_noise,
 84 |                          (state_before *
 85 |                           trng.binomial(state_before.shape,
 86 |                                         p=0.5, n=1,
 87 |                                         dtype=state_before.dtype)),
 88 |                          state_before * 0.5)
 89 |     return proj
 90 | 
 91 | 
 92 | def _p(pp, name):
 93 |     return '%s_%s' % (pp, name)
 94 | 
 95 | 
 96 | def init_params(options):
 97 |     """
 98 |     Global (not LSTM) parameter. For the embeding and the classifier.
 99 |     """
100 |     params = OrderedDict()
101 |     # embedding
102 |     randn = numpy.random.rand(options['n_words'],
103 |                               options['dim_proj'])
104 |     params['Wemb'] = (0.01 * randn).astype(config.floatX)
105 |     params = get_layer(options['encoder'])[0](options,
106 |                                               params,
107 |                                               prefix=options['encoder'])
108 |     # classifier
109 |     params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
110 |                                             options['ydim']).astype(config.floatX)
111 |     params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
112 | 
113 |     return params
114 | 
115 | 
116 | def load_params(path, params):
117 |     pp = numpy.load(path)
118 |     for kk, vv in iteritems(params):
119 |         if kk not in pp:
120 |             raise Warning('%s is not in the archive' % kk)
121 |         params[kk] = pp[kk]
122 | 
123 |     return params
124 | 
125 | 
126 | def init_tparams(params):
127 |     tparams = OrderedDict()
128 |     for kk, pp in iteritems(params):
129 |         tparams[kk] = theano.shared(params[kk], name=kk)
130 |     return tparams
131 | 
132 | 
133 | def get_layer(name):
134 |     fns = layers[name]
135 |     return fns
136 | 
137 | 
138 | def ortho_weight(ndim):
139 |     W = numpy.random.randn(ndim, ndim)
140 |     u, s, v = numpy.linalg.svd(W)
141 |     return u.astype(config.floatX)
142 | 
143 | 
144 | def param_init_lstm(options, params, prefix='lstm'):
145 |     """
146 |     Init the LSTM parameter:
147 | 
148 |     :see: init_params
149 |     """
150 |     W = numpy.concatenate([ortho_weight(options['dim_proj']),
151 |                            ortho_weight(options['dim_proj']),
152 |                            ortho_weight(options['dim_proj']),
153 |                            ortho_weight(options['dim_proj'])], axis=1)
154 |     params[_p(prefix, 'W')] = W
155 |     U = numpy.concatenate([ortho_weight(options['dim_proj']),
156 |                            ortho_weight(options['dim_proj']),
157 |                            ortho_weight(options['dim_proj']),
158 |                            ortho_weight(options['dim_proj'])], axis=1)
159 |     params[_p(prefix, 'U')] = U
160 |     b = numpy.zeros((4 * options['dim_proj'],))
161 |     params[_p(prefix, 'b')] = b.astype(config.floatX)
162 | 
163 |     return params
164 | 
165 | 
166 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
167 |     nsteps = state_below.shape[0]
168 |     if state_below.ndim == 3:
169 |         n_samples = state_below.shape[1]
170 |     else:
171 |         n_samples = 1
172 | 
173 |     assert mask is not None
174 | 
175 |     def _slice(_x, n, dim):
176 |         if _x.ndim == 3:
177 |             return _x[:, :, n * dim:(n + 1) * dim]
178 |         return _x[:, n * dim:(n + 1) * dim]
179 | 
180 |     def _step(m_, x_, h_, c_):
181 |         preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
182 |         preact += x_
183 | 
184 |         i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
185 |         f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
186 |         o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
187 |         c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
188 | 
189 |         c = f * c_ + i * c
190 |         c = m_[:, None] * c + (1. - m_)[:, None] * c_
191 | 
192 |         h = o * tensor.tanh(c)
193 |         h = m_[:, None] * h + (1. - m_)[:, None] * h_
194 | 
195 |         return h, c
196 | 
197 |     state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
198 |                    tparams[_p(prefix, 'b')])
199 | 
200 |     dim_proj = options['dim_proj']
201 |     rval, updates = theano.scan(_step,
202 |                                 sequences=[mask, state_below],
203 |                                 outputs_info=[tensor.alloc(numpy_floatX(0.),
204 |                                                            n_samples,
205 |                                                            dim_proj),
206 |                                               tensor.alloc(numpy_floatX(0.),
207 |                                                            n_samples,
208 |                                                            dim_proj)],
209 |                                 name=_p(prefix, '_layers'),
210 |                                 n_steps=nsteps)
211 |     return rval[0]
212 | 
213 | 
214 | # ff: Feed Forward (normal neural net), only useful to put after lstm
215 | #     before the classifier.
216 | layers = {'lstm': (param_init_lstm, lstm_layer)}
217 | 
218 | 
219 | def sgd(lr, tparams, grads, x, mask, y, cost):
220 |     """ Stochastic Gradient Descent
221 | 
222 |     :note: A more complicated version of sgd then needed.  This is
223 |         done like that for adadelta and rmsprop.
224 | 
225 |     """
226 |     # New set of shared variable that will contain the gradient
227 |     # for a mini-batch.
228 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
229 |                for k, p in iteritems(tparams)]
230 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
231 | 
232 |     # Function that computes gradients for a mini-batch, but do not
233 |     # updates the weights.
234 |     f_grad_shared = theano.function([x, mask, y], cost, updates=gsup,
235 |                                     name='sgd_f_grad_shared')
236 | 
237 |     pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
238 | 
239 |     # Function that updates the weights from the previously computed
240 |     # gradient.
241 |     f_update = theano.function([lr], [], updates=pup,
242 |                                name='sgd_f_update')
243 | 
244 |     return f_grad_shared, f_update
245 | 
246 | 
247 | def adadelta(lr, tparams, grads, x, mask, y, cost):
248 |     """
249 |     An adaptive learning rate optimizer
250 | 
251 |     Parameters
252 |     ----------
253 |     lr : Theano SharedVariable
254 |         Initial learning rate
255 |     tpramas: Theano SharedVariable
256 |         Model parameters
257 |     grads: Theano variable
258 |         Gradients of cost w.r.t to parameres
259 |     x: Theano variable
260 |         Model inputs
261 |     mask: Theano variable
262 |         Sequence mask
263 |     y: Theano variable
264 |         Targets
265 |     cost: Theano variable
266 |         Objective fucntion to minimize
267 | 
268 |     Notes
269 |     -----
270 |     For more information, see [ADADELTA]_.
271 | 
272 |     .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
273 |        Rate Method*, arXiv:1212.5701.
274 |     """
275 | 
276 |     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
277 |                                   name='%s_grad' % k)
278 |                     for k, p in iteritems(tparams)]
279 |     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
280 |                                  name='%s_rup2' % k)
281 |                    for k, p in iteritems(tparams)]
282 |     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
283 |                                     name='%s_rgrad2' % k)
284 |                       for k, p in iteritems(tparams)]
285 | 
286 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
287 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
288 |              for rg2, g in zip(running_grads2, grads)]
289 | 
290 |     f_grad_shared = theano.function([x, mask, y], cost, updates=zgup + rg2up,
291 |                                     name='adadelta_f_grad_shared')
292 | 
293 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
294 |              for zg, ru2, rg2 in zip(zipped_grads,
295 |                                      running_up2,
296 |                                      running_grads2)]
297 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
298 |              for ru2, ud in zip(running_up2, updir)]
299 |     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
300 | 
301 |     f_update = theano.function([lr], [], updates=ru2up + param_up,
302 |                                on_unused_input='ignore',
303 |                                name='adadelta_f_update')
304 | 
305 |     return f_grad_shared, f_update
306 | 
307 | 
308 | def rmsprop(lr, tparams, grads, x, mask, y, cost):
309 |     """
310 |     A variant of  SGD that scales the step size by running average of the
311 |     recent step norms.
312 | 
313 |     Parameters
314 |     ----------
315 |     lr : Theano SharedVariable
316 |         Initial learning rate
317 |     tpramas: Theano SharedVariable
318 |         Model parameters
319 |     grads: Theano variable
320 |         Gradients of cost w.r.t to parameres
321 |     x: Theano variable
322 |         Model inputs
323 |     mask: Theano variable
324 |         Sequence mask
325 |     y: Theano variable
326 |         Targets
327 |     cost: Theano variable
328 |         Objective fucntion to minimize
329 | 
330 |     Notes
331 |     -----
332 |     For more information, see [Hint2014]_.
333 | 
334 |     .. [Hint2014] Geoff Hinton, *Neural Networks for Machine Learning*,
335 |        lecture 6a,
336 |        http://cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
337 |     """
338 | 
339 |     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
340 |                                   name='%s_grad' % k)
341 |                     for k, p in iteritems(tparams)]
342 |     running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
343 |                                    name='%s_rgrad' % k)
344 |                      for k, p in iteritems(tparams)]
345 |     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
346 |                                     name='%s_rgrad2' % k)
347 |                       for k, p in iteritems(tparams)]
348 | 
349 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
350 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
351 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
352 |              for rg2, g in zip(running_grads2, grads)]
353 | 
354 |     f_grad_shared = theano.function([x, mask, y], cost,
355 |                                     updates=zgup + rgup + rg2up,
356 |                                     name='rmsprop_f_grad_shared')
357 | 
358 |     updir = [theano.shared(p.get_value() * numpy_floatX(0.),
359 |                            name='%s_updir' % k)
360 |              for k, p in iteritems(tparams)]
361 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
362 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
363 |                                             running_grads2)]
364 |     param_up = [(p, p + udn[1])
365 |                 for p, udn in zip(tparams.values(), updir_new)]
366 |     f_update = theano.function([lr], [], updates=updir_new + param_up,
367 |                                on_unused_input='ignore',
368 |                                name='rmsprop_f_update')
369 | 
370 |     return f_grad_shared, f_update
371 | 
372 | 
373 | def build_model(tparams, options, seed=1234):
374 |     print(seed)
375 |     trng = RandomStreams(seed)
376 | 
377 |     # Used for dropout.
378 |     use_noise = theano.shared(numpy_floatX(0.))
379 | 
380 |     x = tensor.matrix('x', dtype='int64')
381 |     mask = tensor.matrix('mask', dtype=config.floatX)
382 |     y = tensor.vector('y', dtype='int64')
383 | 
384 |     n_timesteps = x.shape[0]
385 |     n_samples = x.shape[1]
386 | 
387 |     emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps,
388 |                                                 n_samples,
389 |                                                 options['dim_proj']])
390 |     proj = get_layer(options['encoder'])[1](tparams, emb, options,
391 |                                             prefix=options['encoder'],
392 |                                             mask=mask)
393 |     if options['encoder'] == 'lstm':
394 |         proj = (proj * mask[:, :, None]).sum(axis=0)
395 |         proj = proj / mask.sum(axis=0)[:, None]
396 |     if options['use_dropout']:
397 |         proj = dropout_layer(proj, use_noise, trng)
398 | 
399 |     pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])
400 | 
401 |     f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
402 |     f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred')
403 | 
404 |     off = 1e-8
405 |     if pred.dtype == 'float16':
406 |         off = 1e-6
407 | 
408 |     cost = -tensor.log(pred[tensor.arange(n_samples), y] + off).mean()
409 | 
410 |     return use_noise, x, mask, y, f_pred_prob, f_pred, cost
411 | 
412 | 
413 | def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
414 |     """ If you want to use a trained model, this is useful to compute
415 |     the probabilities of new examples.
416 |     """
417 |     n_samples = len(data[0])
418 |     probs = numpy.zeros((n_samples, 2)).astype(config.floatX)
419 | 
420 |     n_done = 0
421 | 
422 |     for _, valid_index in iterator:
423 |         x, mask, y = prepare_data([data[0][t] for t in valid_index],
424 |                                   numpy.array(data[1])[valid_index],
425 |                                   maxlen=None)
426 |         pred_probs = f_pred_prob(x, mask)
427 |         probs[valid_index, :] = pred_probs
428 | 
429 |         n_done += len(valid_index)
430 |         if verbose:
431 |             print('%d/%d samples classified' % (n_done, n_samples))
432 | 
433 |     return probs
434 | 
435 | 
436 | def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
437 |     """
438 |     Just compute the error
439 |     f_pred: Theano fct computing the prediction
440 |     prepare_data: usual prepare_data for that dataset.
441 |     """
442 |     valid_err = 0
443 |     for _, valid_index in iterator:
444 |         x, mask, y = prepare_data([data[0][t] for t in valid_index],
445 |                                   numpy.array(data[1])[valid_index],
446 |                                   maxlen=None)
447 |         preds = f_pred(x, mask)
448 |         targets = numpy.array(data[1])[valid_index]
449 |         valid_err += (preds == targets).sum()
450 |     valid_err = 1. - numpy_floatX(valid_err) / len(data[0])
451 | 
452 |     return valid_err
453 | 
454 | 
455 | def train_lstm(
456 |     dim_proj=1024,  # word embeding dimension and LSTM number of hidden units.
457 | 
458 |     decay_c=0.,  # Weight decay for the classifier applied to the U weights.
459 |     lrate=0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
460 |     n_words=10000,  # Vocabulary size
461 |     optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
462 |     encoder='lstm',  # TODO: can be removed must be lstm.
463 |     saveto='lstm_model.npz',  # The best model will be saved there
464 |     maxlen=100,  # Sequence longer then this get ignored
465 |     batch_size=50,  # The batch size during training.
466 |     #batch_size=100,  # This size for a single gpu
467 |     valid_batch_size=60,  # The batch size used for validation/test set.
468 |     #valid_batch_size=120,  # This size for a single gpu
469 |     validFreq=3, # epoch frequency
470 |     dataset='imdb',
471 | 
472 |     # Parameter for extra option
473 |     noise_std=0.,
474 |     use_dropout=False,  # if False slightly faster, but worst test error
475 |                        # This frequently need a bigger model.
476 |     reload_model=None,  # Path to a saved model we want to start from.
477 |     test_size=-1,  # If >0, we keep only this number of test example.
478 | ):
479 | 
480 |     # Each worker needs the same seed in order to draw the same parameters.
481 |     # This will also make them shuffle the batches the same way, but splits are
482 |     # different so doesnt matter
483 |     seed = worker.send_req('seed')
484 |     numpy.random.seed(seed)
485 | 
486 |     # Model options
487 |     model_options = locals().copy()
488 |     print("model options", model_options)
489 | 
490 |     load_data, prepare_data = get_dataset('imdb')
491 | 
492 |     print('Loading data')
493 |     train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
494 |                                    maxlen=maxlen)
495 |     if test_size > 0:
496 |         # The test set is sorted by size, but we want to keep random
497 |         # size example.  So we must select a random selection of the
498 |         # examples.
499 |         idx = numpy.arange(len(test[0]))
500 |         numpy.random.shuffle(idx)
501 |         idx = idx[:test_size]
502 |         test = ([test[0][n] for n in idx], [test[1][n] for n in idx])
503 | 
504 |     ydim = numpy.max(train[1]) + 1
505 | 
506 |     model_options['ydim'] = ydim
507 | 
508 |     print('Building model')
509 |     # This create the initial parameters as numpy ndarrays.
510 |     # Dict name (string) -> numpy ndarray
511 |     params = init_params(model_options)
512 | 
513 |     if reload_model:
514 |         load_params('lstm_model.npz', params)
515 | 
516 |     # This creates Theano Shared Variable from the parameters.
517 |     # Dict name (string) -> Theano Tensor Shared Variable
518 |     # params and tparams have different copy of the weights.
519 |     tparams = init_tparams(params)
520 | 
521 |     list_tparams = list(tparams.values())
522 |     print("Using all_reduce worker's interface!")
523 |     asgd = AverageSGD(worker)
524 |     asgd.make_rule(list_tparams)
525 |     print("Params init done")
526 | 
527 |     # use_noise is for dropout
528 |     # here we could use a different seed?
529 |     (use_noise, x, mask,
530 |      y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options,
531 |                                                  #seed=seed + worker.global_rank)
532 |                                                  seed=seed)
533 | 
534 |     if decay_c > 0.:
535 |         decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
536 |         weight_decay = 0.
537 |         weight_decay += (tparams['U'] ** 2).sum()
538 |         weight_decay *= decay_c
539 |         cost += weight_decay
540 | 
541 |     f_cost = theano.function([x, mask, y], cost, name='f_cost')
542 | 
543 |     grads = tensor.grad(cost, wrt=list_tparams)
544 |     f_grad = theano.function([x, mask, y], grads, name='f_grad')
545 | 
546 |     lr = tensor.scalar(name='lr')
547 |     f_grad_shared, f_update = optimizer(lr, tparams, grads,
548 |                                         x, mask, y, cost)
549 | 
550 |     print('Optimization')
551 |     splits = worker.send_req('splits', {'train_len': len(train[0]),
552 |                                         'valid_len': len(valid[0]),
553 |                                         'test_len' : len(test[0])})
554 | 
555 |     train = numpy.asarray(train)
556 |     valid = numpy.asarray(valid)
557 |     test = numpy.asarray(test)
558 |     train = train[:, splits['train_splits'][0]:splits['train_splits'][1]]
559 |     valid = valid[:, splits['valid_splits'][0]:splits['valid_splits'][1]]
560 |     test = test[:, splits['test_splits'][0]:splits['test_splits'][1]]
561 |     train = train.tolist()
562 |     valid = valid.tolist()
563 |     test = test.tolist()
564 | 
565 |     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
566 |     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
567 | 
568 |     def train_iter():
569 |         while True:
570 |             kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
571 |             for _, train_index in kf:
572 |                 y = [train[1][t] for t in train_index]
573 |                 x = [train[0][t] for t in train_index]
574 |                 x, mask, y = prepare_data(x, y)
575 |                 yield x, mask, y
576 | 
577 |     train_it = train_iter()
578 |     nb_train = len(train[0]) // batch_size
579 | 
580 |     epoch = 0
581 |     while True:
582 |         use_noise.set_value(numpy_floatX(1.))
583 |         for i in range(nb_train):
584 |             x, mask, y = next(train_it)
585 |             cost = f_grad_shared(x, mask, y)
586 |             f_update(lrate)
587 |             asgd()
588 | 
589 |         print('Train cost:', cost)
590 | 
591 |         if numpy.mod(epoch, validFreq) == 0:
592 |             # do validation
593 |             # trick : each worker can do their valid without talking to the controller
594 |             # even if they finish before another worker, they will wait in the next
595 |             # epoch at the calling of all_reduce when they need to sync again
596 |             use_noise.set_value(numpy_floatX(0.))
597 |             valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
598 |             test_err = pred_error(f_pred, prepare_data, test, kf_test)
599 | 
600 |             # they do need to send the result to the controller
601 |             res = worker.send_req('pred_errors', dict(test_err=float(test_err),
602 |                                   valid_err=float(valid_err), epoch=epoch))
603 | 
604 |             if res == 'best':
605 |                 # should save the param at best
606 |                 pass
607 | 
608 |             if res == 'stop':
609 |                 break
610 |         epoch += 1
611 | 
612 |     # Release all shared resources.
613 |     worker.close()
614 | 
615 | 
616 | if __name__ == '__main__':
617 |     # See function train for all possible parameter and there definition.
618 |     parser = Worker.default_parser()
619 |     args = parser.parse_args()
620 | 
621 |     worker = Worker(**Worker.default_arguments(args))
622 |     train_lstm(test_size=500)
623 | 


--------------------------------------------------------------------------------
/example/lstm/lstm_worker.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Build a tweet sentiment analyzer
  3 | '''
  4 | from __future__ import absolute_import, print_function
  5 | from collections import OrderedDict
  6 | import sys
  7 | import argparse
  8 | 
  9 | import six
 10 | from six import iteritems
 11 | from six.moves import range
 12 | 
 13 | import numpy
 14 | import theano
 15 | from theano import config
 16 | import theano.tensor as tensor
 17 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 18 | 
 19 | import os
 20 | 
 21 | sys.path.append(os.path.dirname(__file__))
 22 | import imdb
 23 | 
 24 | sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
 25 | from platoon.channel import Worker
 26 | from platoon.param_sync import EASGD
 27 | 
 28 | worker = None
 29 | datasets = {'imdb': (imdb.load_data, imdb.prepare_data)}
 30 | 
 31 | 
 32 | def numpy_floatX(data):
 33 |     return numpy.asarray(data, dtype=config.floatX)
 34 | 
 35 | 
 36 | def get_minibatches_idx(n, minibatch_size, shuffle=False):
 37 |     """
 38 |     Used to shuffle the dataset at each iteration.
 39 |     """
 40 | 
 41 |     idx_list = numpy.arange(n, dtype="int32")
 42 | 
 43 |     if shuffle:
 44 |         numpy.random.shuffle(idx_list)
 45 | 
 46 |     minibatches = []
 47 |     minibatch_start = 0
 48 |     for i in range(n // minibatch_size):
 49 |         minibatches.append(idx_list[minibatch_start:
 50 |                                     minibatch_start + minibatch_size])
 51 |         minibatch_start += minibatch_size
 52 | 
 53 |     if (minibatch_start != n):
 54 |         # Make a minibatch out of what is left
 55 |         minibatches.append(idx_list[minibatch_start:])
 56 | 
 57 |     return zip(range(len(minibatches)), minibatches)
 58 | 
 59 | 
 60 | def get_dataset(name):
 61 |     return datasets[name][0], datasets[name][1]
 62 | 
 63 | 
 64 | def zipp(params, tparams):
 65 |     """
 66 |     When we reload the model. Needed for the GPU stuff.
 67 |     """
 68 |     for kk, vv in iteritems(params):
 69 |         tparams[kk].set_value(vv)
 70 | 
 71 | 
 72 | def unzip(zipped):
 73 |     """
 74 |     When we pickle the model. Needed for the GPU stuff.
 75 |     """
 76 |     new_params = OrderedDict()
 77 |     for kk, vv in iteritems(zipped):
 78 |         new_params[kk] = vv.get_value()
 79 |     return new_params
 80 | 
 81 | 
 82 | def dropout_layer(state_before, use_noise, trng):
 83 |     proj = tensor.switch(use_noise,
 84 |                          (state_before *
 85 |                           trng.binomial(state_before.shape,
 86 |                                         p=0.5, n=1,
 87 |                                         dtype=state_before.dtype)),
 88 |                          state_before * 0.5)
 89 |     return proj
 90 | 
 91 | 
 92 | def _p(pp, name):
 93 |     return '%s_%s' % (pp, name)
 94 | 
 95 | 
 96 | def init_params(options):
 97 |     """
 98 |     Global (not LSTM) parameter. For the embeding and the classifier.
 99 |     """
100 |     params = OrderedDict()
101 |     # embedding
102 |     randn = numpy.random.rand(options['n_words'],
103 |                               options['dim_proj'])
104 |     params['Wemb'] = (0.01 * randn).astype(config.floatX)
105 |     params = get_layer(options['encoder'])[0](options,
106 |                                               params,
107 |                                               prefix=options['encoder'])
108 |     # classifier
109 |     params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
110 |                                             options['ydim']).astype(config.floatX)
111 |     params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
112 | 
113 |     return params
114 | 
115 | 
116 | def load_params(path, params):
117 |     pp = numpy.load(path)
118 |     for kk, vv in iteritems(params):
119 |         if kk not in pp:
120 |             raise Warning('%s is not in the archive' % kk)
121 |         params[kk] = pp[kk]
122 | 
123 |     return params
124 | 
125 | 
126 | def init_tparams(params):
127 |     tparams = OrderedDict()
128 |     for kk, pp in iteritems(params):
129 |         tparams[kk] = theano.shared(params[kk], name=kk)
130 |     return tparams
131 | 
132 | 
133 | def get_layer(name):
134 |     fns = layers[name]
135 |     return fns
136 | 
137 | 
138 | def ortho_weight(ndim):
139 |     W = numpy.random.randn(ndim, ndim)
140 |     u, s, v = numpy.linalg.svd(W)
141 |     return u.astype(config.floatX)
142 | 
143 | 
144 | def param_init_lstm(options, params, prefix='lstm'):
145 |     """
146 |     Init the LSTM parameter:
147 | 
148 |     :see: init_params
149 |     """
150 |     W = numpy.concatenate([ortho_weight(options['dim_proj']),
151 |                            ortho_weight(options['dim_proj']),
152 |                            ortho_weight(options['dim_proj']),
153 |                            ortho_weight(options['dim_proj'])], axis=1)
154 |     params[_p(prefix, 'W')] = W
155 |     U = numpy.concatenate([ortho_weight(options['dim_proj']),
156 |                            ortho_weight(options['dim_proj']),
157 |                            ortho_weight(options['dim_proj']),
158 |                            ortho_weight(options['dim_proj'])], axis=1)
159 |     params[_p(prefix, 'U')] = U
160 |     b = numpy.zeros((4 * options['dim_proj'],))
161 |     params[_p(prefix, 'b')] = b.astype(config.floatX)
162 | 
163 |     return params
164 | 
165 | 
166 | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
167 |     nsteps = state_below.shape[0]
168 |     if state_below.ndim == 3:
169 |         n_samples = state_below.shape[1]
170 |     else:
171 |         n_samples = 1
172 | 
173 |     assert mask is not None
174 | 
175 |     def _slice(_x, n, dim):
176 |         if _x.ndim == 3:
177 |             return _x[:, :, n * dim:(n + 1) * dim]
178 |         return _x[:, n * dim:(n + 1) * dim]
179 | 
180 |     def _step(m_, x_, h_, c_):
181 |         preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
182 |         preact += x_
183 | 
184 |         i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
185 |         f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
186 |         o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
187 |         c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
188 | 
189 |         c = f * c_ + i * c
190 |         c = m_[:, None] * c + (1. - m_)[:, None] * c_
191 | 
192 |         h = o * tensor.tanh(c)
193 |         h = m_[:, None] * h + (1. - m_)[:, None] * h_
194 | 
195 |         return h, c
196 | 
197 |     state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
198 |                    tparams[_p(prefix, 'b')])
199 | 
200 |     dim_proj = options['dim_proj']
201 |     rval, updates = theano.scan(_step,
202 |                                 sequences=[mask, state_below],
203 |                                 outputs_info=[tensor.alloc(numpy_floatX(0.),
204 |                                                            n_samples,
205 |                                                            dim_proj),
206 |                                               tensor.alloc(numpy_floatX(0.),
207 |                                                            n_samples,
208 |                                                            dim_proj)],
209 |                                 name=_p(prefix, '_layers'),
210 |                                 n_steps=nsteps)
211 |     return rval[0]
212 | 
213 | 
214 | # ff: Feed Forward (normal neural net), only useful to put after lstm
215 | #     before the classifier.
216 | layers = {'lstm': (param_init_lstm, lstm_layer)}
217 | 
218 | 
219 | def sgd(lr, tparams, grads, x, mask, y, cost):
220 |     """ Stochastic Gradient Descent
221 | 
222 |     :note: A more complicated version of sgd then needed.  This is
223 |         done like that for adadelta and rmsprop.
224 | 
225 |     """
226 |     # New set of shared variable that will contain the gradient
227 |     # for a mini-batch.
228 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
229 |                for k, p in iteritems(tparams)]
230 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
231 | 
232 |     # Function that computes gradients for a mini-batch, but do not
233 |     # updates the weights.
234 |     f_grad_shared = theano.function([x, mask, y], cost, updates=gsup,
235 |                                     name='sgd_f_grad_shared')
236 | 
237 |     pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
238 | 
239 |     # Function that updates the weights from the previously computed
240 |     # gradient.
241 |     f_update = theano.function([lr], [], updates=pup,
242 |                                name='sgd_f_update')
243 | 
244 |     return f_grad_shared, f_update
245 | 
246 | 
247 | def adadelta(lr, tparams, grads, x, mask, y, cost):
248 |     """
249 |     An adaptive learning rate optimizer
250 | 
251 |     Parameters
252 |     ----------
253 |     lr : Theano SharedVariable
254 |         Initial learning rate
255 |     tpramas: Theano SharedVariable
256 |         Model parameters
257 |     grads: Theano variable
258 |         Gradients of cost w.r.t to parameres
259 |     x: Theano variable
260 |         Model inputs
261 |     mask: Theano variable
262 |         Sequence mask
263 |     y: Theano variable
264 |         Targets
265 |     cost: Theano variable
266 |         Objective fucntion to minimize
267 | 
268 |     Notes
269 |     -----
270 |     For more information, see [ADADELTA]_.
271 | 
272 |     .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
273 |        Rate Method*, arXiv:1212.5701.
274 |     """
275 | 
276 |     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
277 |                                   name='%s_grad' % k)
278 |                     for k, p in iteritems(tparams)]
279 |     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
280 |                                  name='%s_rup2' % k)
281 |                    for k, p in iteritems(tparams)]
282 |     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
283 |                                     name='%s_rgrad2' % k)
284 |                       for k, p in iteritems(tparams)]
285 | 
286 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
287 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
288 |              for rg2, g in zip(running_grads2, grads)]
289 | 
290 |     f_grad_shared = theano.function([x, mask, y], cost, updates=zgup + rg2up,
291 |                                     name='adadelta_f_grad_shared')
292 | 
293 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
294 |              for zg, ru2, rg2 in zip(zipped_grads,
295 |                                      running_up2,
296 |                                      running_grads2)]
297 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
298 |              for ru2, ud in zip(running_up2, updir)]
299 |     param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
300 | 
301 |     f_update = theano.function([lr], [], updates=ru2up + param_up,
302 |                                on_unused_input='ignore',
303 |                                name='adadelta_f_update')
304 | 
305 |     return f_grad_shared, f_update
306 | 
307 | 
308 | def rmsprop(lr, tparams, grads, x, mask, y, cost):
309 |     """
310 |     A variant of  SGD that scales the step size by running average of the
311 |     recent step norms.
312 | 
313 |     Parameters
314 |     ----------
315 |     lr : Theano SharedVariable
316 |         Initial learning rate
317 |     tpramas: Theano SharedVariable
318 |         Model parameters
319 |     grads: Theano variable
320 |         Gradients of cost w.r.t to parameres
321 |     x: Theano variable
322 |         Model inputs
323 |     mask: Theano variable
324 |         Sequence mask
325 |     y: Theano variable
326 |         Targets
327 |     cost: Theano variable
328 |         Objective fucntion to minimize
329 | 
330 |     Notes
331 |     -----
332 |     For more information, see [Hint2014]_.
333 | 
334 |     .. [Hint2014] Geoff Hinton, *Neural Networks for Machine Learning*,
335 |        lecture 6a,
336 |        http://cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
337 |     """
338 | 
339 |     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
340 |                                   name='%s_grad' % k)
341 |                     for k, p in iteritems(tparams)]
342 |     running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
343 |                                    name='%s_rgrad' % k)
344 |                      for k, p in iteritems(tparams)]
345 |     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
346 |                                     name='%s_rgrad2' % k)
347 |                       for k, p in iteritems(tparams)]
348 | 
349 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
350 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
351 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
352 |              for rg2, g in zip(running_grads2, grads)]
353 | 
354 |     f_grad_shared = theano.function([x, mask, y], cost,
355 |                                     updates=zgup + rgup + rg2up,
356 |                                     name='rmsprop_f_grad_shared')
357 | 
358 |     updir = [theano.shared(p.get_value() * numpy_floatX(0.),
359 |                            name='%s_updir' % k)
360 |              for k, p in iteritems(tparams)]
361 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
362 |                  for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
363 |                                             running_grads2)]
364 |     param_up = [(p, p + udn[1])
365 |                 for p, udn in zip(tparams.values(), updir_new)]
366 |     f_update = theano.function([lr], [], updates=updir_new + param_up,
367 |                                on_unused_input='ignore',
368 |                                name='rmsprop_f_update')
369 | 
370 |     return f_grad_shared, f_update
371 | 
372 | 
373 | def build_model(tparams, options):
374 |     trng = RandomStreams(SEED)
375 | 
376 |     # Used for dropout.
377 |     use_noise = theano.shared(numpy_floatX(0.))
378 | 
379 |     x = tensor.matrix('x', dtype='int64')
380 |     mask = tensor.matrix('mask', dtype=config.floatX)
381 |     y = tensor.vector('y', dtype='int64')
382 | 
383 |     n_timesteps = x.shape[0]
384 |     n_samples = x.shape[1]
385 | 
386 |     emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps,
387 |                                                 n_samples,
388 |                                                 options['dim_proj']])
389 |     proj = get_layer(options['encoder'])[1](tparams, emb, options,
390 |                                             prefix=options['encoder'],
391 |                                             mask=mask)
392 |     if options['encoder'] == 'lstm':
393 |         proj = (proj * mask[:, :, None]).sum(axis=0)
394 |         proj = proj / mask.sum(axis=0)[:, None]
395 |     if options['use_dropout']:
396 |         proj = dropout_layer(proj, use_noise, trng)
397 | 
398 |     pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])
399 | 
400 |     f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
401 |     f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred')
402 | 
403 |     off = 1e-8
404 |     if pred.dtype == 'float16':
405 |         off = 1e-6
406 | 
407 |     cost = -tensor.log(pred[tensor.arange(n_samples), y] + off).mean()
408 | 
409 |     return use_noise, x, mask, y, f_pred_prob, f_pred, cost
410 | 
411 | 
412 | def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
413 |     """ If you want to use a trained model, this is useful to compute
414 |     the probabilities of new examples.
415 |     """
416 |     n_samples = len(data[0])
417 |     probs = numpy.zeros((n_samples, 2)).astype(config.floatX)
418 | 
419 |     n_done = 0
420 | 
421 |     for _, valid_index in iterator:
422 |         x, mask, y = prepare_data([data[0][t] for t in valid_index],
423 |                                   numpy.array(data[1])[valid_index],
424 |                                   maxlen=None)
425 |         pred_probs = f_pred_prob(x, mask)
426 |         probs[valid_index, :] = pred_probs
427 | 
428 |         n_done += len(valid_index)
429 |         if verbose:
430 |             print('%d/%d samples classified' % (n_done, n_samples))
431 | 
432 |     return probs
433 | 
434 | 
435 | def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
436 |     """
437 |     Just compute the error
438 |     f_pred: Theano fct computing the prediction
439 |     prepare_data: usual prepare_data for that dataset.
440 |     """
441 |     valid_err = 0
442 |     for _, valid_index in iterator:
443 |         x, mask, y = prepare_data([data[0][t] for t in valid_index],
444 |                                   numpy.array(data[1])[valid_index],
445 |                                   maxlen=None)
446 |         preds = f_pred(x, mask)
447 |         targets = numpy.array(data[1])[valid_index]
448 |         valid_err += (preds == targets).sum()
449 |     valid_err = 1. - numpy_floatX(valid_err) / len(data[0])
450 | 
451 |     return valid_err
452 | 
453 | 
454 | def train_lstm(
455 |     dim_proj=1024,  # word embeding dimension and LSTM number of hidden units.
456 | 
457 |     # This value is suggested as being good in the EASGD paper, but
458 |     # you may want to tune this
459 |     train_len=10,  # Train for this many minibatches when requested
460 | 
461 |     decay_c=0.,  # Weight decay for the classifier applied to the U weights.
462 |     lrate=0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
463 |     n_words=10000,  # Vocabulary size
464 |     optimizer=adadelta,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
465 |     encoder='lstm',  # TODO: can be removed must be lstm.
466 |     saveto='lstm_model.npz',  # The best model will be saved there
467 |     maxlen=100,  # Sequence longer then this get ignored
468 |     batch_size=16,  # The batch size during training.
469 |     valid_batch_size=64,  # The batch size used for validation/test set.
470 |     dataset='imdb',
471 | 
472 |     # Parameter for extra option
473 |     noise_std=0.,
474 |     use_dropout=True,  # if False slightly faster, but worst test error
475 |                        # This frequently need a bigger model.
476 |     reload_model=None,  # Path to a saved model we want to start from.
477 |     test_size=-1,  # If >0, we keep only this number of test example.
478 |     valid_sync=False,
479 |     param_sync_api=False
480 | ):
481 | 
482 |     # Model options
483 |     model_options = locals().copy()
484 |     print("model options", model_options)
485 | 
486 |     load_data, prepare_data = get_dataset('imdb')
487 | 
488 |     print('Loading data')
489 |     train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
490 |                                    maxlen=maxlen)
491 |     if test_size > 0:
492 |         # The test set is sorted by size, but we want to keep random
493 |         # size example.  So we must select a random selection of the
494 |         # examples.
495 |         idx = numpy.arange(len(test[0]))
496 |         numpy.random.shuffle(idx)
497 |         idx = idx[:test_size]
498 |         test = ([test[0][n] for n in idx], [test[1][n] for n in idx])
499 | 
500 |     ydim = numpy.max(train[1]) + 1
501 | 
502 |     model_options['ydim'] = ydim
503 | 
504 |     print('Building model')
505 |     # This create the initial parameters as numpy ndarrays.
506 |     # Dict name (string) -> numpy ndarray
507 |     params = init_params(model_options)
508 | 
509 |     if reload_model:
510 |         load_params('lstm_model.npz', params)
511 | 
512 |     # This creates Theano Shared Variable from the parameters.
513 |     # Dict name (string) -> Theano Tensor Shared Variable
514 |     # params and tparams have different copy of the weights.
515 |     tparams = init_tparams(params)
516 | 
517 |     list_tparams = list(tparams.values())
518 |     if param_sync_api:
519 |         print("Using param_sync worker's interface!")
520 |         worker.init_shared_params(list_tparams, param_sync_rule=EASGD(0.5))
521 |     else:
522 |         print("Using all_reduce worker's interface!")
523 |         from platoon.training import global_dynamics as gd
524 |         cparams = init_tparams(params)
525 |         list_cparams = list(cparams.values())
526 |         easgd = gd.EASGD(worker)
527 |         easgd.make_rule(list_tparams, list_cparams, 0.5)
528 |     print("Params init done")
529 | 
530 |     # use_noise is for dropout
531 |     (use_noise, x, mask,
532 |      y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options)
533 | 
534 |     if decay_c > 0.:
535 |         decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
536 |         weight_decay = 0.
537 |         weight_decay += (tparams['U'] ** 2).sum()
538 |         weight_decay *= decay_c
539 |         cost += weight_decay
540 | 
541 |     f_cost = theano.function([x, mask, y], cost, name='f_cost')
542 | 
543 |     grads = tensor.grad(cost, wrt=list_tparams)
544 |     f_grad = theano.function([x, mask, y], grads, name='f_grad')
545 | 
546 |     lr = tensor.scalar(name='lr')
547 |     f_grad_shared, f_update = optimizer(lr, tparams, grads,
548 |                                         x, mask, y, cost)
549 | 
550 |     print('Optimization')
551 | 
552 |     kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
553 |     kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
554 | 
555 |     def train_iter():
556 |         while True:
557 |             kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
558 |             for _, train_index in kf:
559 |                 y = [train[1][t] for t in train_index]
560 |                 x = [train[0][t] for t in train_index]
561 |                 x, mask, y = prepare_data(x, y)
562 |                 yield x, mask, y
563 | 
564 |     train_it = train_iter()
565 | 
566 |     best_p = None
567 | 
568 |     # Making sure that the worker start training with the most recent params
569 |     if param_sync_api:
570 |         worker.copy_to_local()
571 | 
572 |     while True:
573 |         step = worker.send_req('next')
574 |         print(step)
575 | 
576 |         if step == 'train':
577 |             use_noise.set_value(numpy_floatX(1.))
578 |             for i in range(train_len):
579 |                 x, mask, y = next(train_it)
580 |                 cost = f_grad_shared(x, mask, y)
581 |                 f_update(lrate)
582 |             print('Train cost:', cost)
583 |             step = worker.send_req('done', {'train_len': train_len})
584 | 
585 |             print("Syncing with global params")
586 |             if param_sync_api:
587 |                 worker.sync_params(synchronous=True)
588 |             else:
589 |                 easgd()
590 | 
591 |         """
592 |         if step.startswith('save '):
593 |             _, saveto = step.split(' ', 1)
594 |             print 'Saving...',
595 |             # TODO fix that shit so that saving works.
596 |             numpy.savez(saveto, history_errs=history_errs, **s.params)
597 |             pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
598 |             print 'Done'
599 |         """
600 | 
601 |         if step == 'valid':
602 |             if param_sync_api and valid_sync:
603 |                 worker.copy_to_local()
604 |             use_noise.set_value(numpy_floatX(0.))
605 |             valid_err = pred_error(f_pred, prepare_data, valid,
606 |                                    kf_valid)
607 |             test_err = pred_error(f_pred, prepare_data, test, kf_test)
608 |             res = worker.send_req('pred_errors', dict(test_err=float(test_err),
609 |                                   valid_err=float(valid_err)))
610 | 
611 |             if res == 'best':
612 |                 best_p = unzip(tparams)
613 | 
614 |             print(('Valid ', valid_err,
615 |                    'Test ', test_err))
616 |             if param_sync_api and valid_sync:
617 |                 worker.copy_to_local()
618 | 
619 |         if step == 'stop':
620 |             break
621 | 
622 |     # Release all shared resources.
623 |     worker.close()
624 | 
625 |     # FIX that shit later.
626 | """
627 |     if best_p is not None:
628 |         zipp(best_p, tparams)
629 |     else:
630 |         best_p = unzip(tparams)
631 | 
632 |     use_noise.set_value(numpy_floatX(0.))
633 |     kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
634 |     train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
635 |     valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
636 |     test_err = pred_error(f_pred, prepare_data, test, kf_test)
637 | 
638 |     print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
639 |     if saveto:
640 |         numpy.savez(saveto, train_err=train_err,
641 |                     valid_err=valid_err, test_err=test_err,
642 |                     history_errs=history_errs, **best_p)
643 |     print 'The code run for %d epochs, with %f sec/epochs' % (
644 |         (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
645 |     print >> sys.stderr, ('Training took %.1fs' %
646 |                           (end_time - start_time))
647 |     return train_err, valid_err, test_err
648 | """
649 | 
650 | if __name__ == '__main__':
651 |     # See function train for all possible parameter and there definition.
652 |     parser = Worker.default_parser()
653 |     parser.add_argument('--valid_sync', dest='valid_sync', action='store_true', default=False)
654 |     parser.add_argument('--param-sync-api', action='store_true', default=False)
655 |     args = parser.parse_args()
656 | 
657 |     worker = Worker(**Worker.default_arguments(args))
658 |     # Set the random number generators' seeds for consistency
659 |     # Each worker **MUST** be seeded with a different number, so that
660 |     # they do not draw the same minibatches!
661 |     SEED = 123
662 |     numpy.random.seed(SEED + worker.global_rank)
663 | 
664 |     train_lstm(valid_sync=args.valid_sync, test_size=500,
665 |                param_sync_api=args.param_sync_api)
666 | 


--------------------------------------------------------------------------------